From f70efe54b97e95c369ab3f46cdbed8b5608e36d7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Feb 2024 12:39:31 -0800 Subject: [PATCH 0001/1418] work around gcc bugs with 'asm goto' with outputs commit 68fb3ca0e408e00db1c3f8fccdfa19e274c033be upstream. We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits 3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") and a9f180345f53 ("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit 43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. The same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers Cc: Uros Bizjak Cc: Jakub Jelinek Cc: Andrew Pinski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arm/include/asm/jump_label.h | 4 ++-- arch/arm64/include/asm/alternative-macros.h | 4 ++-- arch/arm64/include/asm/jump_label.h | 4 ++-- arch/csky/include/asm/jump_label.h | 4 ++-- arch/mips/include/asm/jump_label.h | 4 ++-- arch/parisc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/bug.h | 2 +- arch/powerpc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/uaccess.h | 8 ++++---- arch/powerpc/kernel/irq_64.c | 2 +- arch/riscv/include/asm/jump_label.h | 4 ++-- arch/s390/include/asm/jump_label.h | 4 ++-- arch/sparc/include/asm/jump_label.h | 4 ++-- arch/um/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +++--- arch/x86/include/asm/rmwcc.h | 2 +- arch/x86/include/asm/uaccess.h | 10 +++++----- arch/x86/include/asm/virtext.h | 12 ++++++------ arch/x86/kvm/svm/svm_ops.h | 6 +++--- arch/x86/kvm/vmx/vmx.c | 8 ++++---- arch/x86/kvm/vmx/vmx_ops.h | 6 +++--- arch/xtensa/include/asm/jump_label.h | 4 ++-- include/linux/compiler-gcc.h | 19 +++++++++++++++++++ include/linux/compiler_types.h | 4 ++-- net/netfilter/nft_set_pipapo_avx2.c | 2 +- samples/bpf/asm_goto_workaround.h | 8 ++++---- tools/arch/x86/include/asm/rmwcc.h | 2 +- tools/include/linux/compiler_types.h | 4 ++-- 30 files changed, 86 insertions(+), 67 deletions(-) diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d96180797396..a339223d9e052 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc03..e4eb54f6cd9fe 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 3622e9f4fb442..51738c56e96cd 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_feature_likely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : : [feature] "i" (feature) @@ -247,7 +247,7 @@ alternative_has_feature_unlikely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) : : [feature] "i" (feature) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index cea441b6aa5dc..b5bd3c38a01b2 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b168bd2..ef2e37a10a0fe 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index c5c6864e64bc4..405c85173f2c1 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -36,7 +36,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -50,7 +50,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b6aa6..317ebc5edc9fe 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c24..20d5052e22925 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -74,7 +74,7 @@ ##__VA_ARGS__) #define WARN_ENTRY(insn, flags, label, ...) \ - asm_volatile_goto( \ + asm goto( \ "1: " insn "\n" \ EX_TABLE(1b, %l[label]) \ _EMIT_BUG_ENTRY \ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec253877..2f2a86ed2280a 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 3ddc65c63a49e..45d4c9cf3f3a2 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -72,7 +72,7 @@ __pu_failed: \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -85,7 +85,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -132,7 +132,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -145,7 +145,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 9dc0ad3c533a8..5a6e44e4d36f5 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ again: * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8ef0f..4a35d787c0191 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bbcc55..bf78cf381dfcd 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dcb776..2718cbea826a7 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526bc12..66fe06db872f0 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d32186..f835b328ba24f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23d3a0..cbbef32517f00 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ l_yes: static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa6112164172..1919ccf493cd1 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -18,7 +18,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 6ca0c661cb637..c638535eedd55 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -155,7 +155,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -317,7 +317,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -397,7 +397,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -416,7 +416,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -499,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6c2e3ff3cb28f..724ce44809ed2 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -43,9 +43,9 @@ static inline int cpu_has_vmx(void) */ static inline int cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); + asm goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); cr4_clear_bits(X86_CR4_VMXE); return 0; @@ -129,9 +129,9 @@ static inline void cpu_svm_disable(void) * case, GIF must already be set, otherwise the NMI would have * been blocked, so just eat the fault. */ - asm_volatile_goto("1: stgi\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "memory" : fault); + asm goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); fault: wrmsrl(MSR_EFER, efer & ~EFER_SVME); } diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707a..4e725854c63a1 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 98d732b9418f1..57c1374fdfd49 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2469,10 +2469,10 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); return 0; fault: diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83ed67..5edab28dfb2ef 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -73,7 +73,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -166,7 +166,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -183,7 +183,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf85021c0..46c8596259d2d 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efdb974..e31243ea637c7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,25 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * + * Work it around via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236661f..a95ec8a3a7ca3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,8 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 90e275bb3e5d7..a3a8ddca99189 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d65..634e81d83efd9 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff975242cac..e2ff22b379a44 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bdd571..d09f9dc172a48 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ -- GitLab From 3af7236d6dbffaeee7538b37e81de39dde4cd5dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Feb 2024 11:14:33 -0800 Subject: [PATCH 0002/1418] update workarounds for gcc "asm goto" issue commit 68fb3ca0e408e00db1c3f8fccdfa19e274c033be upstream. In commit 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") I did the gcc workaround unconditionally, because the cause of the bad code generation wasn't entirely clear. In the meantime, Jakub Jelinek debugged the issue, and has come up with a fix in gcc [2], which also got backported to the still maintained branches of gcc-11, gcc-12 and gcc-13. Note that while the fix technically wasn't in the original gcc-14 branch, Jakub says: "while it is true that no GCC 14 snapshots until today (or whenever the fix will be committed) have the fix, for GCC trunk it is up to the distros to use the latest snapshot if they use it at all and would allow better testing of the kernel code without the workaround, so that if there are other issues they won't be discovered years later. Most userland code doesn't actually use asm goto with outputs..." so we will consider gcc-14 to be fixed - if somebody is using gcc snapshots of the gcc-14 before the fix, they should upgrade. Note that while the bug goes back to gcc-11, in practice other gcc changes seem to have effectively hidden it since gcc-12.1 as per a bisect by Jakub. So even a gcc-14 snapshot without the fix likely doesn't show actual problems. Also, make the default 'asm_goto_output()' macro mark the asm as volatile by hand, because of an unrelated gcc issue [1] where it doesn't match the documented behavior ("asm goto is always volatile"). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 [2] Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Requested-by: Jakub Jelinek Cc: Uros Bizjak Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Andrew Pinski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 7 ++++--- include/linux/compiler_types.h | 9 ++++++++- init/Kconfig | 9 +++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e31243ea637c7..149a520515e1d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -69,10 +69,9 @@ /* * GCC 'asm goto' with outputs miscompiles certain code sequences: * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 * - * Work it around via the same compiler barrier quirk that we used + * Work around it via the same compiler barrier quirk that we used * to use for the old 'asm goto' workaround. * * Also, always mark such 'asm goto' statements as volatile: all @@ -82,8 +81,10 @@ * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND #define asm_goto_output(x...) \ do { asm volatile goto(x); asm (""); } while (0) +#endif #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a95ec8a3a7ca3..574b4121ebe3e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,15 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ #ifndef asm_goto_output -#define asm_goto_output(x...) asm goto(x) +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/init/Kconfig b/init/Kconfig index 148704640252e..ffb927bf6034f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) -- GitLab From 84b576ad44ea9c5149be6c288c46924490c94709 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:06 +0000 Subject: [PATCH 0003/1418] btrfs: add and use helper to check if block group is used commit 1693d5442c458ae8d5b0d58463b873cd879569ed upstream. Add a helper function to determine if a block group is being used and make use of it at btrfs_delete_unused_bgs(). This helper will also be used in future code changes. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 3 +-- fs/btrfs/block-group.h | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 08017b180a10d..efc6f03773eb3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1375,8 +1375,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_lock(&block_group->lock); - if (block_group->reserved || block_group->pinned || - block_group->used || block_group->ro || + if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 47a2dcbfee255..bace40a006379 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -241,6 +241,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) return (block_group->start + block_group->length); } +static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + + return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); +} + static inline bool btrfs_is_block_group_data_only( struct btrfs_block_group *block_group) { -- GitLab From e717aecd2a430873edf63444543c768e42c6a91f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:14 +0000 Subject: [PATCH 0004/1418] btrfs: do not delete unused block group if it may be used soon commit f4a9f219411f318ae60d6ff7f129082a75686c6c upstream. Before deleting a block group that is in the list of unused block groups (fs_info->unused_bgs), we check if the block group became used before deleting it, as extents from it may have been allocated after it was added to the list. However even if the block group was not yet used, there may be tasks that have only reserved space and have not yet allocated extents, and they might be relying on the availability of the unused block group in order to allocate extents. The reservation works first by increasing the "bytes_may_use" field of the corresponding space_info object (which may first require flushing delayed items, allocating a new block group, etc), and only later a task does the actual allocation of extents. For metadata we usually don't end up using all reserved space, as we are pessimistic and typically account for the worst cases (need to COW every single node in a path of a tree at maximum possible height, etc). For data we usually reserve the exact amount of space we're going to allocate later, except when using compression where we always reserve space based on the uncompressed size, as compression is only triggered when writeback starts so we don't know in advance how much space we'll actually need, or if the data is compressible. So don't delete an unused block group if the total size of its space_info object minus the block group's size is less then the sum of used space and space that may be used (space_info->bytes_may_use), as that means we have tasks that reserved space and may need to allocate extents from the block group. In this case, besides skipping the deletion, re-add the block group to the list of unused block groups so that it may be reconsidered later, in case the tasks that reserved space end up not needing to allocate extents from it. Allowing the deletion of the block group while we have reserved space, can result in tasks failing to allocate metadata extents (-ENOSPC) while under a transaction handle, resulting in a transaction abort, or failure during writeback for the case of data extents. CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index efc6f03773eb3..9f77565bd7f5a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1318,6 +1318,7 @@ out: */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { + LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; @@ -1339,6 +1340,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { + u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1374,6 +1376,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) goto next; } + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { @@ -1385,10 +1388,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + up_write(&space_info->groups_sem); + goto next; + } + + /* + * The block group may be unused but there may be space reserved + * accounting with the existence of that block group, that is, + * space_info->bytes_may_use was incremented by a task but no + * space was yet allocated from the block group by the task. + * That space may or may not be allocated, as we are generally + * pessimistic about space reservation for metadata as well as + * for data when using compression (as we reserve space based on + * the worst case, when data can't be compressed, and before + * actually attempting compression, before starting writeback). + * + * So check if the total space of the space_info minus the size + * of this block group is less than the used space of the + * space_info - if that's the case, then it means we have tasks + * that might be relying on the block group in order to allocate + * extents, and add back the block group to the unused list when + * we finish, so that we retry later in case no tasks ended up + * needing to allocate extents from the block group. + */ + used = btrfs_space_info_used(space_info, true); + if (space_info->total_bytes - block_group->length < used) { + /* + * Add a reference for the list, compensate for the ref + * drop under the "next" label for the + * fs_info->unused_bgs list. + */ + btrfs_get_block_group(block_group); + list_add_tail(&block_group->bg_list, &retry_list); + + trace_btrfs_skip_unused_block_group(block_group); + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); @@ -1512,12 +1554,16 @@ next: btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } + list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->unused_bgs); + spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); -- GitLab From a1a7b9589574792796efee9e8023087e2b4f8fa8 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:51:26 -0800 Subject: [PATCH 0005/1418] btrfs: forbid creating subvol qgroups commit 0c309d66dacddf8ce939b891d9ead4a8e21ad6f0 upstream. Creating a qgroup 0/subvolid leads to various races and it isn't helpful, because you can't specify a subvol id when creating a subvol, so you can't be sure it will be the right one. Any requirements on the automatic subvol can be gratified by using a higher level qgroup and the inheritance parameters of subvol creation. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8516c70b5edc1..196e222749ccd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4695,6 +4695,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); -- GitLab From 66b317a2fc45b2ef66527ee3f8fa08fb5beab88d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 20 Jan 2024 19:41:28 +1030 Subject: [PATCH 0006/1418] btrfs: do not ASSERT() if the newly created subvolume already got read commit e03ee2fe873eb68c1f9ba5112fee70303ebf9dfb upstream. [BUG] There is a syzbot crash, triggered by the ASSERT() during subvolume creation: assertion failed: !anon_dev, in fs/btrfs/disk-io.c:1319 ------------[ cut here ]------------ kernel BUG at fs/btrfs/disk-io.c:1319! invalid opcode: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:btrfs_get_root_ref.part.0+0x9aa/0xa60 btrfs_get_new_fs_root+0xd3/0xf0 create_subvol+0xd02/0x1650 btrfs_mksubvol+0xe95/0x12b0 __btrfs_ioctl_snap_create+0x2f9/0x4f0 btrfs_ioctl_snap_create+0x16b/0x200 btrfs_ioctl+0x35f0/0x5cf0 __x64_sys_ioctl+0x19d/0x210 do_syscall_64+0x3f/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b ---[ end trace 0000000000000000 ]--- [CAUSE] During create_subvol(), after inserting root item for the newly created subvolume, we would trigger btrfs_get_new_fs_root() to get the btrfs_root of that subvolume. The idea here is, we have preallocated an anonymous device number for the subvolume, thus we can assign it to the new subvolume. But there is really nothing preventing things like backref walk to read the new subvolume. If that happens before we call btrfs_get_new_fs_root(), the subvolume would be read out, with a new anonymous device number assigned already. In that case, we would trigger ASSERT(), as we really expect no one to read out that subvolume (which is not yet accessible from the fs). But things like backref walk is still possible to trigger the read on the subvolume. Thus our assumption on the ASSERT() is not correct in the first place. [FIX] Fix it by removing the ASSERT(), and just free the @anon_dev, reset it to 0, and continue. If the subvolume tree is read out by something else, it should have already get a new anon_dev assigned thus we only need to free the preallocated one. Reported-by: Chenyuan Yang Fixes: 2dfb1e43f57d ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 40152458e7b74..0d1b05ded1e35 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1662,8 +1662,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev)) { + free_anon_bdev(anon_dev); + anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); -- GitLab From f98913c07cd87f54f7355fa3a0d762b7585c0ae3 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:30:00 -0800 Subject: [PATCH 0007/1418] btrfs: forbid deleting live subvol qgroup commit a8df35619948bd8363d330c20a90c9a7fbff28c0 upstream. If a subvolume still exists, forbid deleting its qgroup 0/subvolid. This behavior generally leads to incorrect behavior in squotas and doesn't have a legitimate purpose. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 96ec9ccc2ef61..b3472bf6b288f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1635,6 +1635,15 @@ out: return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1654,6 +1663,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; -- GitLab From dfd1f44e49585f0248b08a8e17201572909e001c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2024 17:48:44 +0100 Subject: [PATCH 0008/1418] btrfs: send: return EOPNOTSUPP on unknown flags commit f884a9f9e59206a2d41f265e7e403f080d10b493 upstream. When some ioctl flags are checked we return EOPNOTSUPP, like for BTRFS_SCRUB_SUPPORTED_FLAGS, BTRFS_SUBVOL_CREATE_ARGS_MASK or fallocate modes. The EINVAL is supposed to be for a supported but invalid values or combination of options. Fix that when checking send flags so it's consistent with the rest. CC: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/linux-btrfs/CAL3q7H5rryOLzp3EKq8RTbjMHMHeaJubfpsVLF6H4qJnKCUR1w@mail.gmail.com/ Reviewed-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4a4d65b5e24f7..a75669972dc73 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7852,7 +7852,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } -- GitLab From 4d6b2e17b5504a41472deec3628b43285940b6da Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 31 Jan 2024 17:18:04 +0000 Subject: [PATCH 0009/1418] btrfs: don't reserve space for checksums when writing to nocow files commit feefe1f49d26bad9d8997096e3a200280fa7b1c5 upstream. Currently when doing a write to a file we always reserve metadata space for inserting data checksums. However we don't need to do it if we have a nodatacow file (-o nodatacow mount option or chattr +C) or if checksums are disabled (-o nodatasum mount option), as in that case we are only adding unnecessary pressure to metadata reservations. For example on x86_64, with the default node size of 16K, a 4K buffered write into a nodatacow file is reserving 655360 bytes of metadata space, as it's accounting for checksums. After this change, which stops reserving space for checksums if we have a nodatacow file or checksums are disabled, we only need to reserve 393216 bytes of metadata. CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delalloc-space.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index f2bc5563c0f92..63b7fa7067434 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -243,7 +243,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; - u64 csum_leaves; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); @@ -258,10 +257,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_insert_metadata_size(fs_info, - csum_leaves); + if (!(inode->flags & BTRFS_INODE_NODATASUM)) { + u64 csum_leaves; + + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); + } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent @@ -276,14 +277,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, +static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + if (inode->flags & BTRFS_INODE_NODATASUM) + csum_leaves = 0; + else + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); @@ -335,7 +342,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ - calc_inode_reservations(fs_info, num_bytes, disk_num_bytes, + calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); @@ -356,7 +363,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, spin_lock(&inode->lock); nr_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += disk_num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); @@ -390,7 +398,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); -- GitLab From 7ba7f9ed88a161091f2aa163370deb5ebc504524 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Feb 2024 12:09:22 +0000 Subject: [PATCH 0010/1418] btrfs: reject encoded write if inode has nodatasum flag set commit 1bd96c92c6a0a4d43815eb685c15aa4b78879dc9 upstream. Currently we allow an encoded write against inodes that have the NODATASUM flag set, either because they are NOCOW files or they were created while the filesystem was mounted with "-o nodatasum". This results in having compressed extents without corresponding checksums, which is a filesystem inconsistency reported by 'btrfs check'. For example, running btrfs/281 with MOUNT_OPTIONS="-o nodatacow" triggers this and 'btrfs check' errors out with: [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots root 256 inode 257 errors 1040, bad file extent, some csum missing root 256 inode 258 errors 1040, bad file extent, some csum missing ERROR: errors found in fs roots (...) So reject encoded writes if the target inode has NODATASUM set. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82f92b5652a77..63d61fcd95e3b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10774,6 +10774,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE) return -EINVAL; + /* + * Compressed extents should always have checksums, so error out if we + * have a NOCOW file or inode was created while mounted with NODATASUM. + */ + if (inode->flags & BTRFS_INODE_NODATASUM) + return -EINVAL; + orig_count = iov_iter_count(from); /* The extent size must be sane. */ -- GitLab From 02f2b95b00bf57d20320ee168b30fb7f3db8e555 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 31 Jan 2024 14:27:25 -0500 Subject: [PATCH 0011/1418] btrfs: don't drop extent_map for free space inode on write error commit 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade upstream. While running the CI for an unrelated change I hit the following panic with generic/648 on btrfs_holes_spacecache. assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385 ------------[ cut here ]------------ kernel BUG at fs/btrfs/extent_io.c:1385! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1 RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0 Call Trace: extent_write_cache_pages+0x2ac/0x8f0 extent_writepages+0x87/0x110 do_writepages+0xd5/0x1f0 filemap_fdatawrite_wbc+0x63/0x90 __filemap_fdatawrite_range+0x5c/0x80 btrfs_fdatawrite_range+0x1f/0x50 btrfs_write_out_cache+0x507/0x560 btrfs_write_dirty_block_groups+0x32a/0x420 commit_cowonly_roots+0x21b/0x290 btrfs_commit_transaction+0x813/0x1360 btrfs_sync_file+0x51a/0x640 __x64_sys_fdatasync+0x52/0x90 do_syscall_64+0x9c/0x190 entry_SYSCALL_64_after_hwframe+0x6e/0x76 This happens because we fail to write out the free space cache in one instance, come back around and attempt to write it again. However on the second pass through we go to call btrfs_get_extent() on the inode to get the extent mapping. Because this is a new block group, and with the free space inode we always search the commit root to avoid deadlocking with the tree, we find nothing and return a EXTENT_MAP_HOLE for the requested range. This happens because the first time we try to write the space cache out we hit an error, and on an error we drop the extent mapping. This is normal for normal files, but the free space cache inode is special. We always expect the extent map to be correct. Thus the second time through we end up with a bogus extent map. Since we're deprecating this feature, the most straightforward way to fix this is to simply skip dropping the extent map range for this failed range. I shortened the test by using error injection to stress the area to make it easier to reproduce. With this patch in place we no longer panic with my error injection test. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 63d61fcd95e3b..f7f4bcc094642 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3364,8 +3364,23 @@ out: unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); - /* Drop extent maps for the part of the extent we didn't write. */ - btrfs_drop_extent_map_range(inode, unwritten_start, end, false); + /* + * Drop extent maps for the part of the extent we didn't write. + * + * We have an exception here for the free_space_inode, this is + * because when we do btrfs_get_extent() on the free space inode + * we will search the commit root. If this is a new block group + * we won't find anything, and we will trip over the assert in + * writepage where we do ASSERT(em->block_start != + * EXTENT_MAP_HOLE). + * + * Theoretically we could also skip this for any NOCOW extent as + * we don't mess with the extent map tree in the NOCOW case, but + * for now simply skip this if we are the free space inode. + */ + if (!btrfs_is_free_space_inode(inode)) + btrfs_drop_extent_map_range(inode, unwritten_start, + end, false); /* * If the ordered extent had an IOERR or something else went -- GitLab From 0f081fcfaac32897a78a7ddb376631d36f774e44 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:33 -0800 Subject: [PATCH 0012/1418] driver core: Fix device_link_flag_is_sync_state_only() commit 7fddac12c38237252431d5b8af7b6d5771b6d125 upstream. device_link_flag_is_sync_state_only() correctly returns true on the flags of an existing device link that only implements sync_state() functionality. However, it incorrectly and confusingly returns false if it's called with DL_FLAG_SYNC_STATE_ONLY. This bug doesn't manifest in any of the existing calls to this function, but fix this confusing behavior to avoid future bugs. Fixes: 67cad5c67019 ("driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index af90bfb0cc3d8..191590055932f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -337,10 +337,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** -- GitLab From 0cf05064008c13c7ff9ffbf4e4735d2c14518a6f Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 29 Jan 2024 20:25:56 +0100 Subject: [PATCH 0013/1418] of: unittest: Fix compile in the non-dynamic case [ Upstream commit 607aad1e4356c210dbef9022955a3089377909b2 ] If CONFIG_OF_KOBJ is not set, a device_node does not contain a kobj and attempts to access the embedded kobj via kref_read break the compile. Replace affected kref_read calls with a macro that reads the refcount if it exists and returns 1 if there is no embedded kobj. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401291740.VP219WIz-lkp@intel.com/ Fixes: 4dde83569832 ("of: Fix double free in of_parse_phandle_with_args_map") Signed-off-by: Christian A. Ehrhardt Link: https://lore.kernel.org/r/20240129192556.403271-1-lk@c--e.de Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index e541a8960f1de..ce1386074e66b 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -49,6 +49,12 @@ static struct unittest_results { failed; \ }) +#ifdef CONFIG_OF_KOBJ +#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref) +#else +#define OF_KREF_READ(NODE) 1 +#endif + /* * Expected message may have a message level other than KERN_INFO. * Print the expected message only if the current loglevel will allow @@ -562,7 +568,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void) pr_err("missing testcase data\n"); return; } - prefs[i] = kref_read(&p[i]->kobj.kref); + prefs[i] = OF_KREF_READ(p[i]); } rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells"); @@ -685,9 +691,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void) unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); for (i = 0; i < ARRAY_SIZE(p); ++i) { - unittest(prefs[i] == kref_read(&p[i]->kobj.kref), + unittest(prefs[i] == OF_KREF_READ(p[i]), "provider%d: expected:%d got:%d\n", - i, prefs[i], kref_read(&p[i]->kobj.kref)); + i, prefs[i], OF_KREF_READ(p[i])); of_node_put(p[i]); } } -- GitLab From b3557a3697aee39bf80b9645cfd2d0ec69ec4e7f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:13 +0800 Subject: [PATCH 0014/1418] KVM: selftests: Clear dirty ring states between two modes in dirty_log_test [ Upstream commit 7167190ddb863bd061c0c6b61f4cec94184b40da ] There are two states, which need to be cleared before next mode is executed. Otherwise, we will hit failure as the following messages indicate. - The variable 'dirty_ring_vcpu_ring_full' shared by main and vcpu thread. It's indicating if the vcpu exit due to full ring buffer. The value can be carried from previous mode (VM_MODE_P40V48_4K) to current one (VM_MODE_P40V48_64K) when VM_MODE_P40V48_16K isn't supported. - The current ring buffer index needs to be reset before next mode (VM_MODE_P40V48_64K) is executed. Otherwise, the stale value is carried from previous mode (VM_MODE_P40V48_4K). # ./dirty_log_test -M dirty-ring Setting log mode to: 'dirty-ring' Test iterations: 32, interval: 10 (ms) Testing guest mode: PA-bits:40, VA-bits:48, 4K pages guest physical test memory offset: 0xffbfffc000 : Dirtied 995328 pages Total bits checked: dirty (1012434), clear (7114123), track_next (966700) Testing guest mode: PA-bits:40, VA-bits:48, 64K pages guest physical test memory offset: 0xffbffc0000 vcpu stops because vcpu is kicked out... vcpu continues now. Notifying vcpu to continue Iteration 1 collected 0 pages vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... ==== Test Assertion Failure ==== dirty_log_test.c:369: cleared == count pid=10541 tid=10541 errno=22 - Invalid argument 1 0x0000000000403087: dirty_ring_collect_dirty_pages at dirty_log_test.c:369 2 0x0000000000402a0b: log_mode_collect_dirty_pages at dirty_log_test.c:492 3 (inlined by) run_test at dirty_log_test.c:795 4 (inlined by) run_test at dirty_log_test.c:705 5 0x0000000000403a37: for_each_guest_mode at guest_modes.c:100 6 0x0000000000401ccf: main at dirty_log_test.c:938 7 0x0000ffff9ecd279b: ?? ??:0 8 0x0000ffff9ecd286b: ?? ??:0 9 0x0000000000401def: _start at ??:? Reset dirty pages (0) mismatch with collected (35566) Fix the issues by clearing 'dirty_ring_vcpu_ring_full' and the ring buffer index before next new mode is to be executed. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-7-gshan@redhat.com Stable-dep-of: ba58f873cdee ("KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test") Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/dirty_log_test.c | 27 ++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe15..8758c10ec8503 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,7 +350,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); @@ -406,7 +408,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); @@ -471,13 +474,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -696,6 +700,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -771,6 +776,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +784,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why -- GitLab From e5ed6c9225378b7ab6b0e305dd84da7c87219a1b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Feb 2024 15:18:31 -0800 Subject: [PATCH 0015/1418] KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test [ Upstream commit ba58f873cdeec30b6da48e28dd5782c5a3e1371b ] When finishing the final iteration of dirty_log_test testcase, set host_quit _before_ the final "continue" so that the vCPU worker doesn't run an extra iteration, and delete the hack-a-fix of an extra "continue" from the dirty ring testcase. This fixes a bug where the extra post to sem_vcpu_cont may not be consumed, which results in failures in subsequent runs of the testcases. The bug likely was missed during development as x86 supports only a single "guest mode", i.e. there aren't any subsequent testcases after the dirty ring test, because for_each_guest_mode() only runs a single iteration. For the regular dirty log testcases, letting the vCPU run one extra iteration is a non-issue as the vCPU worker waits on sem_vcpu_cont if and only if the worker is explicitly told to stop (vcpu_sync_stop_requested). But for the dirty ring test, which needs to periodically stop the vCPU to reap the dirty ring, letting the vCPU resume the guest _after_ the last iteration means the vCPU will get stuck without an extra "continue". However, blindly firing off an post to sem_vcpu_cont isn't guaranteed to be consumed, e.g. if the vCPU worker sees host_quit==true before resuming the guest. This results in a dangling sem_vcpu_cont, which leads to subsequent iterations getting out of sync, as the vCPU worker will continue on before the main task is ready for it to resume the guest, leading to a variety of asserts, e.g. ==== Test Assertion Failure ==== dirty_log_test.c:384: dirty_ring_vcpu_ring_full pid=14854 tid=14854 errno=22 - Invalid argument 1 0x00000000004033eb: dirty_ring_collect_dirty_pages at dirty_log_test.c:384 2 0x0000000000402d27: log_mode_collect_dirty_pages at dirty_log_test.c:505 3 (inlined by) run_test at dirty_log_test.c:802 4 0x0000000000403dc7: for_each_guest_mode at guest_modes.c:100 5 0x0000000000401dff: main at dirty_log_test.c:941 (discriminator 3) 6 0x0000ffff9be173c7: ?? ??:0 7 0x0000ffff9be1749f: ?? ??:0 8 0x000000000040206f: _start at ??:? Didn't continue vcpu even without ring full Alternatively, the test could simply reset the semaphores before each testcase, but papering over hacks with more hacks usually ends in tears. Reported-by: Shaoqin Huang Fixes: 84292e565951 ("KVM: selftests: Add dirty ring buffer test") Reviewed-by: Peter Xu Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240202231831.354848-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/dirty_log_test.c | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8758c10ec8503..ec40a33c29fda 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -355,7 +355,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -394,12 +397,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -412,7 +409,6 @@ struct log_mode { uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -431,7 +427,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -492,14 +487,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -701,6 +688,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; unsigned long *bmap; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -772,12 +760,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { @@ -803,15 +801,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " -- GitLab From 72d4600a6eb8d93e1e0458ee3988ff58019ac0a2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:06 +0300 Subject: [PATCH 0016/1418] wifi: iwlwifi: Fix some error codes [ Upstream commit c6ebb5b67641994de8bc486b33457fe0b681d6fe ] This saves the error as PTR_ERR(wifi_pkg). The problem is that "wifi_pkg" is a valid pointer, not an error pointer. Set the error code to -EINVAL instead. Fixes: 2a8084147bff ("iwlwifi: acpi: support reading and storing WRDS revision 1 and 2") Signed-off-by: Dan Carpenter Link: https://msgid.link/9620bb77-2d7c-4d76-b255-ad824ebf8e35@moroto.mountain Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 585e8cd2d332d..bdb8464cd4321 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -576,7 +576,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -592,7 +592,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -608,7 +608,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -665,7 +665,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -681,7 +681,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -697,7 +697,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } -- GitLab From 4bd106ac1c38eb2eb8693f2ceb7d9517f56aadcf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:31 +0300 Subject: [PATCH 0017/1418] wifi: iwlwifi: uninitialized variable in iwl_acpi_get_ppag_table() [ Upstream commit 65c6ee90455053cfd3067c17aaa4a42b0c766543 ] This is an error path and Smatch complains that "tbl_rev" is uninitialized on this path. All the other functions follow this same patter where they set the error code and goto out_free so that's probably what was intended here as well. Fixes: e8e10a37c51c ("iwlwifi: acpi: move ppag code from mvm to fw/acpi") Signed-off-by: Dan Carpenter Link: https://msgid.link/09900c01-6540-4a32-9451-563da0029cb6@moroto.mountain Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index bdb8464cd4321..f5fcc547de391 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -1044,6 +1044,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto read_table; } + ret = PTR_ERR(wifi_pkg); + goto out_free; + read_table: fwrt->ppag_ver = tbl_rev; flags = &wifi_pkg->package.elements[1]; -- GitLab From 68614f1865a0dec2b1dbfdeafd575ed8a0378799 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:01 -0800 Subject: [PATCH 0018/1418] of: property: Improve finding the supplier of a remote-endpoint property [ Upstream commit 782bfd03c3ae2c0e6e01b661b8e18f1de50357be ] After commit 4a032827daa8 ("of: property: Simplify of_link_to_phandle()"), remote-endpoint properties created a fwnode link from the consumer device to the supplier endpoint. This is a tiny bit inefficient (not buggy) when trying to create device links or detecting cycles. So, improve this the same way we improved finding the consumer of a remote-endpoint property. Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-3-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/property.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index b636777e6f7c8..e1946cc170309 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1261,7 +1261,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) -DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) @@ -1326,6 +1325,17 @@ static struct device_node *parse_interrupts(struct device_node *np, return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } +static struct device_node *parse_remote_endpoint(struct device_node *np, + const char *prop_name, + int index) +{ + /* Return NULL for index > 0 to signify end of remote-endpoints. */ + if (!index || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +} + static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, -- GitLab From 65ded4eb220695909eee657758e824fc30f0b561 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 7 Feb 2024 08:24:15 -0500 Subject: [PATCH 0019/1418] net: openvswitch: limit the number of recursions from action sets [ Upstream commit 6e2f90d31fe09f2b852de25125ca875aabd81367 ] The ovs module allows for some actions to recursively contain an action list for complex scenarios, such as sampling, checking lengths, etc. When these actions are copied into the internal flow table, they are evaluated to validate that such actions make sense, and these calls happen recursively. The ovs-vswitchd userspace won't emit more than 16 recursion levels deep. However, the module has no such limit and will happily accept limits larger than 16 levels nested. Prevent this by tracking the number of recursions happening and manually limiting it to 16 levels nested. The initial implementation of the sample action would track this depth and prevent more than 3 levels of recursion, but this was removed to support the clone use case, rather than limited at the current userspace limit. Fixes: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Aaron Conole Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240207132416.1488485-2-aconole@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/flow_netlink.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ead5418c126e3..e3c85ceb1f0a5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -47,6 +47,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2543,13 +2544,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2600,7 +2603,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2615,7 +2619,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2658,7 +2663,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2672,7 +2678,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2692,7 +2699,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3061,7 +3069,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3109,7 +3117,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3122,7 +3131,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3150,12 +3160,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3350,7 +3364,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3421,7 +3435,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3435,7 +3449,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3445,7 +3460,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3485,7 +3501,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); -- GitLab From b9357489c46c7a43999964628db8b47d3a1f8672 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 6 Feb 2024 13:30:54 +0100 Subject: [PATCH 0020/1418] lan966x: Fix crash when adding interface under a lag [ Upstream commit 15faa1f67ab405d47789d4702f587ec7df7ef03e ] There is a crash when adding one of the lan966x interfaces under a lag interface. The issue can be reproduced like this: ip link add name bond0 type bond miimon 100 mode balance-xor ip link set dev eth0 master bond0 The reason is because when adding a interface under the lag it would go through all the ports and try to figure out which other ports are under that lag interface. And the issue is that lan966x can have ports that are NULL pointer as they are not probed. So then iterating over these ports it would just crash as they are NULL pointers. The fix consists in actually checking for NULL pointers before accessing something from the ports. Like we do in other places. Fixes: cabc9d49333d ("net: lan966x: Add lag support for lan966x") Signed-off-by: Horatiu Vultur Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206123054.3052966-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan966x/lan966x_lag.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c index 41fa2523d91d3..5f2cd9a8cf8fb 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c @@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) { - struct net_device *bond = lan966x->ports[lag]->bond; + struct lan966x_port *port = lan966x->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!port || !port->bond || (visited & BIT(lag))) continue; + bond = port->bond; bond_mask = lan966x_lag_get_mask(lan966x, bond); for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) { struct lan966x_port *port = lan966x->ports[p]; + if (!port) + continue; + lan_wr(ANA_PGID_PGID_SET(bond_mask), lan966x, ANA_PGID(p)); if (port->lag_tx_active) -- GitLab From 5ad627faed136089e27bcd15e0c33760e575c8c3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2023 19:19:11 +0100 Subject: [PATCH 0021/1418] tls/sw: Use splice_eof() to flush [ Upstream commit df720d288dbb1793e82b6ccbfc670ec871e9def4 ] Allow splice to end a TLS record after prematurely ending a splice/sendfile due to getting an EOF condition (->splice_read() returned 0) after splice had called TLS with a sendmsg() with MSG_MORE set when the user didn't set MSG_MORE. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/ Signed-off-by: David Howells Reviewed-by: Jakub Kicinski cc: Chuck Lever cc: Boris Pismenny cc: John Fastabend cc: Jens Axboe cc: Matthew Wilcox Signed-off-by: Jakub Kicinski Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls.h | 1 + net/tls/tls_main.c | 2 ++ net/tls/tls_sw.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/net/tls/tls.h b/net/tls/tls.h index 0672acab27731..4922668fefaa8 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -97,6 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +void tls_sw_splice_eof(struct socket *sock); int tls_sw_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 338a443fa47b2..80b42a3e78830 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -922,6 +922,7 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; @@ -990,6 +991,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0323040d34bc6..fbe6aab5f5b26 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1158,6 +1158,80 @@ send_end: return copied > 0 ? copied : ret; } +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_rec *rec; + struct sk_msg *msg_pl; + ssize_t copied = 0; + bool retrying = false; + int ret = 0; + int pending; + + if (!ctx->open_rec) + return; + + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); + +retry: + rec = ctx->open_rec; + if (!rec) + goto unlock; + + msg_pl = &rec->msg_plaintext; + + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; + } + + /* Wait for pending encryptions to get completed */ + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no pending + * encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + if (ctx->async_wait.err) + goto unlock; + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } + +unlock: + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); +} + static int tls_sw_do_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { -- GitLab From d55eb0b495a9e4de1c61394087bb06f12d18b6bc Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:46 +0200 Subject: [PATCH 0022/1418] tls: extract context alloc/initialization out of tls_set_sw_offload [ Upstream commit 615580cbc99af0da2d1c7226fab43a3d5003eb97 ] Simplify tls_set_sw_offload a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 86 ++++++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fbe6aab5f5b26..47ae429e50e30 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2587,6 +2587,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2608,48 +2650,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } -- GitLab From 2c6841c88201e13967583f0f8a9f9b54b9cde404 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:18 -0800 Subject: [PATCH 0023/1418] net: tls: factor out tls_*crypt_async_wait() [ Upstream commit c57ca512f3b68ddcd62bda9cc24a8f5584ab01b1 ] Factor out waiting for async encrypt and decrypt to finish. There are already multiple copies and a subsequent fix will need more. No functional changes. Note that crypto_wait_req() returns wait->err Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 96 +++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 47ae429e50e30..b146be099a3fc 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -229,6 +229,20 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) spin_unlock_bh(&ctx->decrypt_compl_lock); } +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->decrypt_compl_lock); + reinit_completion(&ctx->async_wait.completion); + pending = atomic_read(&ctx->decrypt_pending); + spin_unlock_bh(&ctx->decrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + + return ctx->async_wait.err; +} + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -496,6 +510,28 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no + * pending encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -953,7 +989,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1122,24 +1157,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1171,7 +1194,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1203,22 +1225,7 @@ retry: } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -2197,16 +2204,10 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2425,16 +2426,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); -- GitLab From 7a3ca06d04d589deec81f56229a9a9d62352ce01 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: [PATCH 0024/1418] tls: fix race between async notify and socket close [ Upstream commit aec7961916f3f9e88766e2688992da6980f11b8d ] The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tls.h | 5 ----- net/tls/tls_sw.c | 43 ++++++++++--------------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index c36bf4c50027e..899c863aba02c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -108,9 +108,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -147,8 +144,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b146be099a3fc..ee11932237c07 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -223,22 +223,15 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { - int pending; - - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); return ctx->async_wait.err; } @@ -266,6 +259,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -455,7 +449,6 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct tls_rec *rec; bool ready = false; struct sock *sk; - int pending; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -495,12 +488,8 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -512,22 +501,9 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { - int pending; - - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } @@ -578,6 +554,7 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); @@ -2594,7 +2571,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc } crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; @@ -2615,7 +2592,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) } crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); -- GitLab From 20b4ed034872b4d024b26e2bc1092c3f80e5db96 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 6 Feb 2024 17:18:22 -0800 Subject: [PATCH 0025/1418] net: tls: fix use-after-free with partial reads and async decrypt [ Upstream commit 32b55c5ff9103b8508c1e04bfa5a08c64e7a925f ] tls_decrypt_sg doesn't take a reference on the pages from clear_skb, so the put_page() in tls_decrypt_done releases them, and we trigger a use-after-free in process_rx_list when we try to read from the partially-read skb. Fixes: fd31f3996af2 ("tls: rx: decrypt into a fresh skb") Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ee11932237c07..d651c50746a8b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -62,6 +62,7 @@ struct tls_decrypt_ctx { u8 iv[MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -186,7 +187,6 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct aead_request *aead_req = crypto_get_completion_data(data); struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -212,7 +212,7 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -1653,6 +1653,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, -- GitLab From 727cdd2f3dca2dd0528e7b2b711cb8529b1bea20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:24 -0800 Subject: [PATCH 0026/1418] net: tls: fix returned read length with async decrypt [ Upstream commit ac437a51ce662364062f704e321227f6728e6adc ] We double count async, non-zc rx data. The previous fix was lucky because if we fully zc async_copy_bytes is 0 so we add 0. Decrypted already has all the bytes we handled, in all cases. We don't have to adjust anything, delete the erroneous line. Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d651c50746a8b..09d258bb2df75 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2202,7 +2202,6 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted += max(err, 0); } copied += decrypted; -- GitLab From 251145e504370d1f9db0c8aa70a5b898d2f5ba56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 10 Feb 2024 17:40:08 +0100 Subject: [PATCH 0027/1418] spi: ppc4xx: Drop write-only variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b3aa619a8b4706f35cb62f780c14e68796b37f3f ] Since commit 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") the bits_per_word variable is only written to. The check that was there before isn't needed any more as the spi core ensures that only 8 bit transfers are used, so the variable can go away together with all assignments to it. Fixes: 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240210164006.208149-8-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-ppc4xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index d65f047b6c823..1179a1115137f 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -166,10 +166,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) int scr; u8 cdm = 0; u32 speed; - u8 bits_per_word; /* Start with the generic configuration for this device. */ - bits_per_word = spi->bits_per_word; speed = spi->max_speed_hz; /* @@ -177,9 +175,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) * the transfer to overwrite the generic configuration with zeros. */ if (t) { - if (t->bits_per_word) - bits_per_word = t->bits_per_word; - if (t->speed_hz) speed = min(t->speed_hz, spi->max_speed_hz); } -- GitLab From 1f0d7792e9023e8658e901b7b76a555f6aa052ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 11 Feb 2024 12:58:34 +0300 Subject: [PATCH 0028/1418] ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work() [ Upstream commit 6ef5d5b92f7117b324efaac72b3db27ae8bb3082 ] There is a path in rt5645_jack_detect_work(), where rt5645->jd_mutex is left locked forever. That may lead to deadlock when rt5645_jack_detect_work() is called for the second time. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cdba4301adda ("ASoC: rt5650: add mutex to avoid the jack detection failure") Signed-off-by: Alexey Khoroshilov Link: https://lore.kernel.org/r/1707645514-21196-1-git-send-email-khoroshilov@ispras.ru Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index fd3dca08460ba..844d14d4c9a51 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3288,6 +3288,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); + mutex_unlock(&rt5645->jd_mutex); return; case 4: val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020; -- GitLab From 20f378f92971f4d48bba59518d02911d63f23820 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 9 Feb 2024 01:55:18 -0800 Subject: [PATCH 0029/1418] net: sysfs: Fix /sys/class/net/ path for statistics [ Upstream commit 5b3fbd61b9d1f4ed2db95aaf03f9adae0373784d ] The Documentation/ABI/testing/sysfs-class-net-statistics documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 6044f9700645 ("net: sysfs: document /sys/class/net/statistics/*") Signed-off-by: Breno Leitao Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ABI/testing/sysfs-class-net-statistics | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db27815361b..53e508c6936a5 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class//statistics/collisions +What: /sys/class/net//statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class//statistics/multicast +What: /sys/class/net//statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class//statistics/rx_bytes +What: /sys/class/net//statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class//statistics/rx_compressed +What: /sys/class/net//statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class//statistics/rx_crc_errors +What: /sys/class/net//statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class//statistics/rx_dropped +What: /sys/class/net//statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_errors +What: /sys/class/net//statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_fifo_errors +What: /sys/class/net//statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_frame_errors +What: /sys/class/net//statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_length_errors +What: /sys/class/net//statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_missed_errors +What: /sys/class/net//statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_nohandler +What: /sys/class/net//statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class//statistics/rx_over_errors +What: /sys/class/net//statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_packets +What: /sys/class/net//statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class//statistics/tx_aborted_errors +What: /sys/class/net//statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_bytes +What: /sys/class/net//statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class//statistics/tx_carrier_errors +What: /sys/class/net//statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_compressed +What: /sys/class/net//statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class//statistics/tx_dropped +What: /sys/class/net//statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_errors +What: /sys/class/net//statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_fifo_errors +What: /sys/class/net//statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_heartbeat_errors +What: /sys/class/net//statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_packets +What: /sys/class/net//statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class//statistics/tx_window_errors +What: /sys/class/net//statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org -- GitLab From 4b02c89327f7b58b453c339e2743706a871b87e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:22:17 +0100 Subject: [PATCH 0030/1418] nouveau/svm: fix kvcalloc() argument order [ Upstream commit 2c80a2b715df75881359d07dbaacff8ad411f40e ] The conversion to kvcalloc() mixed up the object size and count arguments, causing a warning: drivers/gpu/drm/nouveau/nouveau_svm.c: In function 'nouveau_svm_fault_buffer_ctor': drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: error: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 1010 | buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); | ^ drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: note: earlier argument should specify number of elements, later size of each element The behavior is still correct aside from the warning, but fixing it avoids the warnings and can help the compiler track the individual objects better. Fixes: 71e4bbca070e ("nouveau/svm: Use kvcalloc() instead of kvzalloc()") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240212112230.1117284-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 31a5b81ee9fc4..be6674fb1af71 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -997,7 +997,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; -- GitLab From c638b4afc750a0c7087b51065e2461d81b02049f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 11 Feb 2024 08:08:37 -0800 Subject: [PATCH 0031/1418] MIPS: Add 'memory' clobber to csum_ipv6_magic() inline assembler [ Upstream commit d55347bfe4e66dce2e1e7501e5492f4af3e315f8 ] After 'lib: checksum: Use aligned accesses for ip_fast_csum and csum_ipv6_magic tests' was applied, the test_csum_ipv6_magic unit test started failing for all mips platforms, both little and bit endian. Oddly enough, adding debug code into test_csum_ipv6_magic() made the problem disappear. The gcc manual says: "The "memory" clobber tells the compiler that the assembly code performs memory reads or writes to items other than those listed in the input and output operands (for example, accessing the memory pointed to by one of the input parameters) " This is definitely the case for csum_ipv6_magic(). Indeed, adding the 'memory' clobber fixes the problem. Cc: Charlie Jenkins Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guenter Roeck Reviewed-by: Charlie Jenkins Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 4044eaf989ac7..0921ddda11a4b 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " .set pop" : "=&r" (sum), "=&r" (tmp) : "r" (saddr), "r" (daddr), - "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)); + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "memory"); return csum_fold(sum); } -- GitLab From 1c981792e4ccbc134b468797acdd7781959e6893 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 8 Feb 2024 10:03:33 -0800 Subject: [PATCH 0032/1418] i40e: Do not allow untrusted VF to remove administratively set MAC [ Upstream commit 73d9629e1c8c1982f13688c4d1019c3994647ccc ] Currently when PF administratively sets VF's MAC address and the VF is put down (VF tries to delete all MACs) then the MAC is removed from MAC filters and primary VF MAC is zeroed. Do not allow untrusted VF to remove primary MAC when it was set administratively by PF. Reproducer: 1) Create VF 2) Set VF interface up 3) Administratively set the VF's MAC 4) Put VF interface down [root@host ~]# echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs [root@host ~]# ip link set enp2s0f0v0 up [root@host ~]# ip link set enp2s0f0 vf 0 mac fe:6c:b5:da:c7:7d [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether fe:6c:b5:da:c7:7d brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off [root@host ~]# ip link set enp2s0f0v0 down [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off Fixes: 700bbf6c1f9e ("i40e: allow VF to remove any MAC filter") Fixes: ceb29474bbbc ("i40e: Add support for VF to specify its primary MAC address") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240208180335.1844996-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3d3db58090ed1..ed4be80fec2a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2846,6 +2846,24 @@ error_param: (u8 *)&stats, sizeof(stats)); } +/** + * i40e_can_vf_change_mac + * @vf: pointer to the VF info + * + * Return true if the VF is allowed to change its MAC filters, false otherwise + */ +static bool i40e_can_vf_change_mac(struct i40e_vf *vf) +{ + /* If the VF MAC address has been set administratively (via the + * ndo_set_vf_mac command), then deny permission to the VF to + * add/delete unicast MAC addresses, unless the VF is trusted + */ + if (vf->pf_set_mac && !vf->trusted) + return false; + + return true; +} + #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2905,8 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !is_multicast_ether_addr(addr) && vf->pf_set_mac && + if (!i40e_can_vf_change_mac(vf) && + !is_multicast_ether_addr(addr) && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); @@ -3049,19 +3067,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } - if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) - was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ - for (i = 0; i < al->num_elements; i++) + for (i = 0; i < al->num_elements; i++) { + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr) && + i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + } spin_unlock_bh(&vsi->mac_filter_hash_lock); -- GitLab From 7a245b8a2fa96b7cc202df4a7cf6303486e18790 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Nov 2023 17:01:03 +0100 Subject: [PATCH 0033/1418] i40e: Fix waiting for queues of all VSIs to be disabled [ Upstream commit c73729b64bb692186da080602cd13612783f52ac ] The function i40e_pf_wait_queues_disabled() iterates all PF's VSIs up to 'pf->hw.func_caps.num_vsis' but this is incorrect because the real number of VSIs can be up to 'pf->num_alloc_vsi' that can be higher. Fix this loop. Fixes: 69129dc39fac ("i40e: Modify Tx disable wait flow in case of DCB reconfiguration") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Reviewed-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 63d43ef86f9b9..76455405a6d8e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5333,7 +5333,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { + for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) { ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) -- GitLab From 76a42074d0b8b342f4c1abb0553a64271c5d2bb2 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 22 Jan 2024 09:52:01 -0800 Subject: [PATCH 0034/1418] scs: add CONFIG_MMU dependency for vfree_atomic() commit 6f9dc684cae638dda0570154509884ee78d0f75c upstream. The shadow call stack implementation fails to build without CONFIG_MMU: ld.lld: error: undefined symbol: vfree_atomic >>> referenced by scs.c >>> kernel/scs.o:(scs_free) in archive vmlinux.a Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 14273a6203dfc..f99fd9a4ca778 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -642,6 +642,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from -- GitLab From 36be97e9eb535fe3008a5cb040b1e56f29f2e398 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 0035/1418] tracing/trigger: Fix to return error if failed to alloc snapshot commit 0958b33ef5a04ed91f61cef4760ac412080c4e08 upstream. Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d749325..f941ce01ee351 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1455,8 +1455,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } -- GitLab From 16b1025eaa8fc223ab4273ece20d1c3a4211a95d Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 0036/1418] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again commit 9319b647902cbd5cc884ac08a8a6d54ce111fc78 upstream. (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index de5f69921b946..d3e9d12860b9f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1526,7 +1526,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need -- GitLab From 221da504a55b15c5b4eb7363b53d88d41163fe00 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 09:09:56 -0800 Subject: [PATCH 0037/1418] scsi: storvsc: Fix ring buffer size calculation commit f4469f3858352ad1197434557150b1f7086762a0 upstream. Current code uses the specified ring buffer size (either the default of 128 Kbytes or a module parameter specified value) to encompass the one page ring buffer header plus the actual ring itself. When the page size is 4K, carving off one page for the header isn't significant. But when the page size is 64K on ARM64, only half of the default 128 Kbytes is left for the actual ring. While this doesn't break anything, the smaller ring size could be a performance bottleneck. Fix this by applying the VMBUS_RING_SIZE macro to the specified ring buffer size. This macro adds a page for the header, and rounds up the size to a page boundary, using the page size for which the kernel is built. Use this new size for subsequent ring buffer calculations. For example, on ARM64 with 64K page size and the default ring size, this results in the actual ring being 128 Kbytes, which is intended. Cc: stable@vger.kernel.org # 5.15.x Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122170956.496436-1-mhklinux@outlook.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c2d981d5a2dd5..4fad9d85bd6f9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -326,6 +326,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -683,8 +684,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1964,7 +1965,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2157,7 +2158,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2191,8 +2192,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); -- GitLab From 30884a44e0cedc3dfda8c22432f3ba4078ec2d94 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 31 Jan 2024 21:57:27 +0100 Subject: [PATCH 0038/1418] dm-crypt, dm-verity: disable tasklets commit 0a9bab391e336489169b95cb0d4553d921302189 upstream. Tasklets have an inherent problem with memory corruption. The function tasklet_action_common calls tasklet_trylock, then it calls the tasklet callback and then it calls tasklet_unlock. If the tasklet callback frees the structure that contains the tasklet or if it calls some code that may free it, tasklet_unlock will write into free memory. The commits 8e14f610159d and d9a02e016aaf try to fix it for dm-crypt, but it is not a sufficient fix and the data corruption can still happen [1]. There is no fix for dm-verity and dm-verity will write into free memory with every tasklet-processed bio. There will be atomic workqueues implemented in the kernel 6.9 [2]. They will have better interface and they will not suffer from the memory corruption problem. But we need something that stops the memory corruption now and that can be backported to the stable kernels. So, I'm proposing this commit that disables tasklets in both dm-crypt and dm-verity. This commit doesn't remove the tasklet support, because the tasklet code will be reused when atomic workqueues will be implemented. [1] https://lore.kernel.org/all/d390d7ee-f142-44d3-822a-87949e14608b@suse.de/T/ [2] https://lore.kernel.org/lkml/20240130091300.2968534-1-tj@kernel.org/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 39d42fa96ba1b ("dm crypt: add flags to optionally bypass kcryptd workqueues") Fixes: 5721d4e5a9cdb ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 37 ++--------------------------------- drivers/md/dm-verity-target.c | 26 ++---------------------- drivers/md/dm-verity.h | 1 - 3 files changed, 4 insertions(+), 60 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ff515437d81e7..0e6068ee783e7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -72,10 +72,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -1729,7 +1727,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1738,12 +1735,6 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - bio_endio(io->base_bio); -} - /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1767,20 +1758,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -2213,11 +2190,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2229,15 +2201,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 24df610a2c438..4669923f4cfb4 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -634,23 +634,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -663,13 +646,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c870e61..f3f6070084196 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -83,7 +83,6 @@ struct dm_verity_io { struct bvec_iter iter; struct work_struct work; - struct tasklet_struct tasklet; /* * Three variably-size fields follow this struct: -- GitLab From 607385d75a0bb229998528078b563572a56db81e Mon Sep 17 00:00:00 2001 From: Techno Mooney Date: Mon, 29 Jan 2024 15:11:47 +0700 Subject: [PATCH 0039/1418] ASoC: amd: yc: Add DMI quirk for MSI Bravo 15 C7VF commit c6dce23ec993f7da7790a9eadb36864ceb60e942 upstream. The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Techno Mooney Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218402 Cc: stable@vger.kernel.org Signed-off-by: Techno Mooney Signed-off-by: Bagas Sanjaya Link: https://msgid.link/r/20240129081148.1044891-1-bagasdotme@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 808d002826233..3b43595aa87a7 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -297,6 +297,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From f00e8d0fccf64c3646bae71a58764f4ee90ac0f5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Jan 2024 21:16:39 +0100 Subject: [PATCH 0040/1418] parisc: Prevent hung tasks when printing inventory on serial console commit c8708d758e715c3824a73bf0cda97292b52be44d upstream. Printing the inventory on a serial console can be quite slow and thus may trigger the hung task detector (CONFIG_DETECT_HUNG_TASK=y) and possibly reboot the machine. Adding a cond_resched() prevents this. Signed-off-by: Helge Deller Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/drivers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f12b9f318ae6..a582928739dd5 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1003,6 +1003,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); -- GitLab From b23c431e505207c7f201d57fc4e041a47a96224f Mon Sep 17 00:00:00 2001 From: David Senoner Date: Fri, 26 Jan 2024 16:56:26 +0100 Subject: [PATCH 0041/1418] ALSA: hda/realtek: Fix the external mic not being recognised for Acer Swift 1 SF114-32 commit efb56d84dd9c3de3c99fc396abb57c6d330038b5 upstream. If you connect an external headset/microphone to the 3.5mm jack on the Acer Swift 1 SF114-32 it does not recognize the microphone. This fixes that and gives the user the ability to choose between internal and headset mic. Signed-off-by: David Senoner Cc: Link: https://lore.kernel.org/r/20240126155626.2304465-1-seda18@rolmail.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62f2137044923..ef060f2c25438 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9431,6 +9431,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), -- GitLab From ea09996b3711d9f7bb9597c7bdc95f28475d2f8b Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Sun, 28 Jan 2024 16:57:04 +0100 Subject: [PATCH 0042/1418] ALSA: hda/realtek: Enable Mute LED on HP Laptop 14-fq0xxx commit f0d78972f27dc1d1d51fbace2713ad3cdc60a877 upstream. This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20240128155704.2333812-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ef060f2c25438..274f528eba0ba 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9618,6 +9618,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), -- GitLab From d7d7a0e3b6f5adc45f23667cbb919e99093a5b5c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jan 2024 18:09:01 +0100 Subject: [PATCH 0043/1418] HID: i2c-hid-of: fix NULL-deref on failed power up commit 00aab7dcb2267f2aef59447602f34501efe1a07f upstream. A while back the I2C HID implementation was split in an ACPI and OF part, but the new OF driver never initialises the client pointer which is dereferenced on power-up failures. Fixes: b33752c30023 ("HID: i2c-hid: Reorganize so ACPI and OF are separate modules") Cc: stable@vger.kernel.org # 5.12 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index 97a27a803f58d..6feb812fce375 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -80,6 +80,7 @@ static int i2c_hid_of_probe(struct i2c_client *client, if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; -- GitLab From b71a906a72ddebb511891c76fb7d2b0cdcf85f79 Mon Sep 17 00:00:00 2001 From: Tatsunosuke Tobita Date: Thu, 1 Feb 2024 13:40:55 +0900 Subject: [PATCH 0044/1418] HID: wacom: generic: Avoid reporting a serial of '0' to userspace commit ab41a31dd5e2681803642b6d08590b61867840ec upstream. The xf86-input-wacom driver does not treat '0' as a valid serial number and will drop any input report which contains an MSC_SERIAL = 0 event. The kernel driver already takes care to avoid sending any MSC_SERIAL event if the value of serial[0] == 0 (which is the case for devices that don't actually report a serial number), but this is not quite sufficient. Only the lower 32 bits of the serial get reported to userspace, so if this portion of the serial is zero then there can still be problems. This commit allows the driver to report either the lower 32 bits if they are non-zero or the upper 32 bits otherwise. Signed-off-by: Jason Gerecke Signed-off-by: Tatsunosuke Tobita Fixes: f85c9dc678a5 ("HID: wacom: generic: Support tool ID and additional tool types") CC: stable@vger.kernel.org # v4.10 Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 165ed872fa4e7..53235b276bb24 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2571,7 +2571,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev, wacom_wac->hid_data.tipswitch); input_report_key(input, wacom_wac->tool[0], sense); if (wacom_wac->serial[0]) { - input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); + /* + * xf86-input-wacom does not accept a serial number + * of '0'. Report the low 32 bits if possible, but + * if they are zero, report the upper ones instead. + */ + __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu; + __u32 serial_hi = wacom_wac->serial[0] >> 32; + input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi)); input_report_abs(input, ABS_MISC, sense ? id : 0); } -- GitLab From e5c6c8ef3e4d3a88407404daafbf2c6be62f2185 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 29 Jan 2024 14:35:45 -0800 Subject: [PATCH 0045/1418] HID: wacom: Do not register input devices until after hid_hw_start commit c1d6708bf0d3dd976460d435373cf5abf21ce258 upstream. If a input device is opened before hid_hw_start is called, events may not be received from the hardware. In the case of USB-backed devices, for example, the hid_hw_start function is responsible for filling in the URB which is submitted when the input device is opened. If a device is opened prematurely, polling will never start because the device will not have been in the correct state to send the URB. Because the wacom driver registers its input devices before calling hid_hw_start, there is a window of time where a device can be opened and end up in an inoperable state. Some ARM-based Chromebooks in particular reliably trigger this bug. This commit splits the wacom_register_inputs function into two pieces. One which is responsible for setting up the allocated inputs (and runs prior to hid_hw_start so that devices are ready for any input events they may end up receiving) and another which only registers the devices (and runs after hid_hw_start to ensure devices can be immediately opened without issue). Note that the functions to initialize the LEDs and remotes are also moved after hid_hw_start to maintain their own dependency chains. Fixes: 7704ac937345 ("HID: wacom: implement generic HID handling for pen generic devices") Cc: stable@vger.kernel.org # v3.18+ Suggested-by: Dmitry Torokhov Signed-off-by: Jason Gerecke Tested-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_sys.c | 63 ++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index af163e8dfec07..12d4c28741d7e 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2080,7 +2080,7 @@ static int wacom_allocate_inputs(struct wacom *wacom) return 0; } -static int wacom_register_inputs(struct wacom *wacom) +static int wacom_setup_inputs(struct wacom *wacom) { struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; struct wacom_wac *wacom_wac = &(wacom->wacom_wac); @@ -2099,10 +2099,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pen_input_dev); wacom_wac->pen_input = NULL; pen_input_dev = NULL; - } else { - error = input_register_device(pen_input_dev); - if (error) - goto fail; } error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac); @@ -2111,10 +2107,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(touch_input_dev); wacom_wac->touch_input = NULL; touch_input_dev = NULL; - } else { - error = input_register_device(touch_input_dev); - if (error) - goto fail; } error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); @@ -2123,7 +2115,34 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; - } else { + } + + return 0; +} + +static int wacom_register_inputs(struct wacom *wacom) +{ + struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; + struct wacom_wac *wacom_wac = &(wacom->wacom_wac); + int error = 0; + + pen_input_dev = wacom_wac->pen_input; + touch_input_dev = wacom_wac->touch_input; + pad_input_dev = wacom_wac->pad_input; + + if (pen_input_dev) { + error = input_register_device(pen_input_dev); + if (error) + goto fail; + } + + if (touch_input_dev) { + error = input_register_device(touch_input_dev); + if (error) + goto fail; + } + + if (pad_input_dev) { error = input_register_device(pad_input_dev); if (error) goto fail; @@ -2379,6 +2398,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } + error = wacom_setup_inputs(wacom); + if (error) + goto fail; + + if (features->type == HID_GENERIC) + connect_mask |= HID_CONNECT_DRIVER; + + /* Regular HID work starts now */ + error = hid_hw_start(hdev, connect_mask); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail; + } + error = wacom_register_inputs(wacom); if (error) goto fail; @@ -2393,16 +2426,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } - if (features->type == HID_GENERIC) - connect_mask |= HID_CONNECT_DRIVER; - - /* Regular HID work starts now */ - error = hid_hw_start(hdev, connect_mask); - if (error) { - hid_err(hdev, "hw start failed\n"); - goto fail; - } - if (!wireless) { /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(wacom); -- GitLab From 711beb8acf5d007302d167c4aa63cb5ff517cea9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 4 Feb 2024 04:56:17 -0800 Subject: [PATCH 0046/1418] iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP commit 621c6257128149e45b36ffb973a01c3f3461b893 upstream. When als_capture_sample() is called with usage ID HID_USAGE_SENSOR_TIME_TIMESTAMP, return 0. The HID sensor core ignores the return value for capture_sample() callback, so return value doesn't make difference. But correct the return value to return success instead of -EINVAL. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240204125617.2635574-1-srinivas.pandruvada@linux.intel.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/hid-sensor-als.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5a1a625d8d16d..85097b769c209 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -228,6 +228,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; -- GitLab From 3fd6c16ec78da4349de068529e2a175723dbdd0a Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:21 +0100 Subject: [PATCH 0047/1418] usb: ucsi: Add missing ppm_lock commit c9aed03a0a683fd1600ea92f2ad32232d4736272 upstream. Calling ->sync_write must be done while holding the PPM lock as the mailbox logic does not support concurrent commands. At least since the addition of partner task this means that ucsi_acknowledge_connector_change should be called with the PPM lock held as it calls ->sync_write. Thus protect the only call to ucsi_acknowledge_connector_change with the PPM. All other calls to ->sync_write already happen under the PPM lock. Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dc2dea3768fb6..0695ee54ff781 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -831,7 +831,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) clear_bit(EVENT_PENDING, &con->ucsi->flags); + mutex_lock(&ucsi->ppm_lock); ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); -- GitLab From d31b886ed6a5095214062ee4fb55037eb930adb6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 26 Jan 2024 17:38:00 -0500 Subject: [PATCH 0048/1418] usb: ulpi: Fix debugfs directory leak commit 3caf2b2ad7334ef35f55b95f3e1b138c6f77b368 upstream. The ULPI per-device debugfs root is named after the ulpi device's parent, but ulpi_unregister_interface tries to remove a debugfs directory named after the ulpi device itself. This results in the directory sticking around and preventing subsequent (deferred) probes from succeeding. Change the directory name to match the ulpi device. Fixes: bd0a0a024f2a ("usb: ulpi: Add debugfs support") Cc: stable@vger.kernel.org Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240126223800.2864613-1-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 38703781ee2d1..1283c427cdf88 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", -- GitLab From 9f754d009483791c14eef98095369a3a4ea66cac Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:22 +0100 Subject: [PATCH 0049/1418] usb: ucsi_acpi: Fix command completion handling commit 2840143e393a4ddc1caab4372969ea337371168c upstream. In case of a spurious or otherwise delayed notification it is possible that CCI still reports the previous completion. The UCSI spec is aware of this and provides two completion bits in CCI, one for normal commands and one for acks. As acks and commands alternate the notification handler can determine if the completion bit is from the current command. The initial UCSI code correctly handled this but the distinction between the two completion bits was lost with the introduction of the new API. To fix this revive the ACK_PENDING bit for ucsi_acpi and only complete commands if the completion bit matches. Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-3-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 217355f1f9b94..26171c5d3c61c 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -73,9 +73,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(ACK_PENDING, &ua->flags); + else + set_bit(COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +89,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(ACK_PENDING, &ua->flags); + else + clear_bit(COMMAND_PENDING, &ua->flags); return ret; } @@ -142,8 +149,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } -- GitLab From 2888258144e08d1f7905c0ee5df8c14d26d23699 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 Jan 2024 16:35:32 +0100 Subject: [PATCH 0050/1418] USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f17c34ffc792bbb520e4b61baa16b6cfc7d44b13 upstream. The OTG 1.3 spec has the feature A_ALT_HNP_SUPPORT, which tells a device that it is connected to the wrong port. Some devices refuse to operate if you enable that feature, because it indicates to them that they ought to request to be connected to another port. According to the spec this feature may be used based only the following three conditions: 6.5.3 a_alt_hnp_support Setting this feature indicates to the B-device that it is connected to an A-device port that is not capable of HNP, but that the A-device does have an alternate port that is capable of HNP. The A-device is required to set this feature under the following conditions: • the A-device has multiple receptacles • the A-device port that connects to the B-device does not support HNP • the A-device has another port that does support HNP A check for the third and first condition is missing. Add it. Signed-off-by: Oliver Neukum Cc: stable Fixes: 7d2d641c44269 ("usb: otg: don't set a_alt_hnp_support feature for OTG 2.0 device") Link: https://lore.kernel.org/r/20240122153545.12284-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4f181110d00db..d960a56b760ec 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2389,17 +2389,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif -- GitLab From 0ecc97c81e3aea3335ea4a0b0f1777aae42b71ec Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Tue, 23 Jan 2024 11:48:29 +0800 Subject: [PATCH 0051/1418] usb: f_mass_storage: forbid async queue when shutdown happen commit b2d2d7ea0dd09802cf5a0545bf54d8ad8987d20c upstream. When write UDC to empty and unbind gadget driver from gadget device, it is possible that there are many queue failures for mass storage function. The root cause is mass storage main thread alaways try to queue request to receive a command from host if running flag is on, on platform like dwc3, if pull down called, it will not queue request again and return -ESHUTDOWN, but it not affect running flag of mass storage function. Check return code from mass storage function and clear running flag if it is -ESHUTDOWN, also indicate start in/out transfer failure to break loops. Cc: stable Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240123034829.3848409-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 7b9a4cf9b100c..d35f30a9cae2c 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -544,21 +544,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } -- GitLab From 57e2e42ccd3cd6183228269715ed032f44536751 Mon Sep 17 00:00:00 2001 From: Uttkarsh Aggarwal Date: Fri, 19 Jan 2024 15:18:25 +0530 Subject: [PATCH 0052/1418] usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend commit 61a348857e869432e6a920ad8ea9132e8d44c316 upstream. In current scenario if Plug-out and Plug-In performed continuously there could be a chance while checking for dwc->gadget_driver in dwc3_gadget_suspend, a NULL pointer dereference may occur. Call Stack: CPU1: CPU2: gadget_unbind_driver dwc3_suspend_common dwc3_gadget_stop dwc3_gadget_suspend dwc3_disconnect_gadget CPU1 basically clears the variable and CPU2 checks the variable. Consider CPU1 is running and right before gadget_driver is cleared and in parallel CPU2 executes dwc3_gadget_suspend where it finds dwc->gadget_driver which is not NULL and resumes execution and then CPU1 completes execution. CPU2 executes dwc3_disconnect_gadget where it checks dwc->gadget_driver is already NULL because of which the NULL pointer deference occur. Cc: stable@vger.kernel.org Fixes: 9772b47a4c29 ("usb: dwc3: gadget: Fix suspend/resume during device mode") Acked-by: Thinh Nguyen Signed-off-by: Uttkarsh Aggarwal Link: https://lore.kernel.org/r/20240119094825.26530-1-quic_uaggarwa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c4703f6b20894..576c21bf77cda 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4583,15 +4583,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; -- GitLab From 6616d3c4f8284a7b3ef978c916566bd240cea1c7 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 10 Jan 2024 15:16:26 +0200 Subject: [PATCH 0053/1418] interconnect: qcom: sc8180x: Mark CO0 BCM keepalive [ Upstream commit 85e985a4f46e462a37f1875cb74ed380e7c0c2e0 ] The CO0 BCM needs to be up at all times, otherwise some hardware (like the UFS controller) loses its connection to the rest of the SoC, resulting in a hang of the platform, accompanied by a spectacular logspam. Mark it as keepalive to prevent such cases. Fixes: 9c8c6bac1ae8 ("interconnect: qcom: Add SC8180x providers") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231214-topic-sc8180_fixes-v1-1-421904863006@linaro.org Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin --- drivers/interconnect/qcom/sc8180x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 83461e31774ec..d9ee193fb18bd 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1387,6 +1387,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; -- GitLab From be76ad74a43f90f340f9f479e6b04f02125f6aef Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 09:14:19 +0100 Subject: [PATCH 0054/1418] media: ir_toy: fix a memleak in irtoy_tx [ Upstream commit dc9ceb90c4b42c6e5c6757df1d6257110433788e ] When irtoy_command fails, buf should be freed since it is allocated by irtoy_tx, or there is a memleak. Fixes: 4114978dcd24 ("media: ir_toy: prevent device from hanging during transmit") Signed-off-by: Zhipeng Lu Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/ir_toy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 1968067092594..69e630d85262f 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } -- GitLab From 2aaa9239c981d849129bd2f18dc98a1f6faa23c3 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:34 -0800 Subject: [PATCH 0055/1418] driver core: fw_devlink: Improve detection of overlapping cycles [ Upstream commit 6442d79d880cf7a2fff18779265d657fef0cce4c ] fw_devlink can detect most overlapping/intersecting cycles. However it was missing a few corner cases because of an incorrect optimization logic that tries to avoid repeating cycle detection for devices that are already marked as part of a cycle. Here's an example provided by Xu Yang (edited for clarity): usb +-----+ tcpc | | +-----+ | +--| | |----------->|EP| |--+ | | +--| |EP|<-----------| | |--+ | | B | | | +-----+ | A | | +-----+ | ^ +-----+ | | | | | +-----| C |<--+ | | +-----+ usb-phy Node A (tcpc) will be populated as device 1-0050. Node B (usb) will be populated as device 38100000.usb. Node C (usb-phy) will be populated as device 381f0040.usb-phy. The description below uses the notation: consumer --> supplier child ==> parent 1. Node C is populated as device C. No cycles detected because cycle detection is only run when a fwnode link is converted to a device link. 2. Node B is populated as device B. As we convert B --> C into a device link we run cycle detection and find and mark the device link/fwnode link cycle: C--> A --> B.EP ==> B --> C 3. Node A is populated as device A. As we convert C --> A into a device link, we see it's already part of a cycle (from step 2) and don't run cycle detection. Thus we miss detecting the cycle: A --> B.EP ==> B --> A.EP ==> A Looking at it another way, A depends on B in one way: A --> B.EP ==> B But B depends on A in two ways and we only detect the first: B --> C --> A B --> A.EP ==> A To detect both of these, we remove the incorrect optimization attempt in step 3 and run cycle detection even if the fwnode link from which the device link is being created has already been marked as part of a cycle. Reported-by: Xu Yang Closes: https://lore.kernel.org/lkml/DU2PR04MB8822693748725F85DC0CB86C8C792@DU2PR04MB8822.eurprd04.prod.outlook.com/ Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 191590055932f..3078f44dc1861 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2056,9 +2056,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); -- GitLab From 0c09912dd8387e228afcc5e34ac5d79b1e3a1058 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Tue, 23 Jan 2024 09:45:59 +0800 Subject: [PATCH 0056/1418] powerpc/kasan: Fix addr error caused by page alignment [ Upstream commit 4a7aee96200ad281a5cc4cf5c7a2e2a49d2b97b0 ] In kasan_init_region, when k_start is not page aligned, at the begin of for loop, k_cur = k_start & PAGE_MASK is less than k_start, and then `va = block + k_cur - k_start` is less than block, the addr va is invalid, because the memory address space from va to block is not alloced by memblock_alloc, which will not be reserved by memblock_reserve later, it will be used by other places. As a result, memory overwriting occurs. for example: int __init __weak kasan_init_region(void *start, size_t size) { [...] /* if say block(dcd97000) k_start(feef7400) k_end(feeff3fe) */ block = memblock_alloc(k_end - k_start, PAGE_SIZE); [...] for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { /* at the begin of for loop * block(dcd97000) va(dcd96c00) k_cur(feef7000) k_start(feef7400) * va(dcd96c00) is less than block(dcd97000), va is invalid */ void *va = block + k_cur - k_start; [...] } [...] } Therefore, page alignment is performed on k_start before memblock_alloc() to ensure the validity of the VA address. Fixes: 663c0c9496a6 ("powerpc/kasan: Fix shadow area set up for modules.") Signed-off-by: Jiangfeng Xiao Signed-off-by: Michael Ellerman Link: https://msgid.link/1705974359-43790-1-git-send-email-xiaojiangfeng@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/kasan/init_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d9357..aa9aa11927b2f 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; -- GitLab From 7190353835b4a219abb70f90b06cdcae97f11512 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:18:46 +0300 Subject: [PATCH 0057/1418] cifs: fix underflow in parse_server_interfaces() [ Upstream commit cffe487026be13eaf37ea28b783d9638ab147204 ] In this loop, we step through the buffer and after each item we check if the size_left is greater than the minimum size we need. However, the problem is that "bytes_left" is type ssize_t while sizeof() is type size_t. That means that because of type promotion, the comparison is done as an unsigned and if we have negative bytes left the loop continues instead of ending. Fixes: fe856be475f7 ("CIFS: parse and store info on iface queries") Signed-off-by: Dan Carpenter Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 5a157000bdfe6..34d1262004dfb 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -613,7 +613,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; -- GitLab From 083870b029c06da6a9a49340dd78637eec35a1d4 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Mon, 12 Feb 2024 18:22:39 +0530 Subject: [PATCH 0058/1418] i2c: qcom-geni: Correct I2C TRE sequence [ Upstream commit 83ef106fa732aea8558253641cd98e8a895604d7 ] For i2c read operation in GSI mode, we are getting timeout due to malformed TRE basically incorrect TRE sequence in gpi(drivers/dma/qcom/gpi.c) driver. I2C driver has geni_i2c_gpi(I2C_WRITE) function which generates GO TRE and geni_i2c_gpi(I2C_READ)generates DMA TRE. Hence to generate GO TRE before DMA TRE, we should move geni_i2c_gpi(I2C_WRITE) before geni_i2c_gpi(I2C_READ) inside the I2C GSI mode transfer function i.e. geni_i2c_gpi_xfer(). TRE stands for Transfer Ring Element - which is basically an element with size of 4 words. It contains all information like slave address, clk divider, dma address value data size etc). Mainly we have 3 TREs(Config, GO and DMA tre). - CONFIG TRE : consists of internal register configuration which is required before start of the transfer. - DMA TRE : contains DDR/Memory address, called as DMA descriptor. - GO TRE : contains Transfer directions, slave ID, Delay flags, Length of the transfer. I2c driver calls GPI driver API to config each TRE depending on the protocol. For read operation tre sequence will be as below which is not aligned to hardware programming guide. - CONFIG tre - DMA tre - GO tre As per Qualcomm's internal Hardware Programming Guide, we should configure TREs in below sequence for any RX only transfer. - CONFIG tre - GO tre - DMA tre Fixes: d8703554f4de ("i2c: qcom-geni: Add support for GPI DMA") Reviewed-by: Andi Shyti Reviewed-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue # qrb5165-rb5 Co-developed-by: Mukesh Kumar Savaliya Signed-off-by: Mukesh Kumar Savaliya Signed-off-by: Viken Dadhaniya Reviewed-by: Dmitry Baryshkov Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-qcom-geni.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8fce98bb77ff9..75b9c3f26bba6 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -605,20 +605,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); + if (ret) + goto err; + if (msgs[i].flags & I2C_M_RD) { ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); if (ret) goto err; - } - - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); - if (ret) - goto err; - if (msgs[i].flags & I2C_M_RD) dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); -- GitLab From c12920ff9b570e8aefb3f02a41edd33257d30802 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 30 Jan 2024 16:27:20 +0800 Subject: [PATCH 0059/1418] irqchip/loongson-eiointc: Use correct struct type in eiointc_domain_alloc() [ Upstream commit f1c2765c6afcd1f71f76ed8c9bf94acedab4cecb ] eiointc_domain_alloc() uses struct eiointc, which is not defined, for a pointer. Older compilers treat that as a forward declaration and due to assignment of a void pointer there is no warning emitted. As the variable is then handed in as a void pointer argument to irq_domain_set_info() the code is functional. Use struct eiointc_priv instead. [ tglx: Rewrote changelog ] Fixes: dd281e1a1a93 ("irqchip: Add Loongson Extended I/O interrupt controller support") Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Acked-by: Huacai Chen Link: https://lore.kernel.org/r/20240130082722.2912576-2-maobibo@loongson.cn Signed-off-by: Sasha Levin --- drivers/irqchip/irq-loongson-eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 3d99b8bdd8ef1..de115ee6e9ec7 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -242,7 +242,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) -- GitLab From 4297217bcf1f0948a19c2bacc6b68d92e7778ad9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Feb 2024 17:42:44 +1100 Subject: [PATCH 0060/1418] powerpc/kasan: Limit KASAN thread size increase to 32KB [ Upstream commit f1acb109505d983779bbb7e20a1ee6244d2b5736 ] KASAN is seen to increase stack usage, to the point that it was reported to lead to stack overflow on some 32-bit machines (see link). To avoid overflows the stack size was doubled for KASAN builds in commit 3e8635fb2e07 ("powerpc/kasan: Force thread size increase with KASAN"). However with a 32KB stack size to begin with, the doubling leads to a 64KB stack, which causes build errors: arch/powerpc/kernel/switch.S:249: Error: operand out of range (0x000000000000fe50 is not between 0xffffffffffff8000 and 0x0000000000007fff) Although the asm could be reworked, in practice a 32KB stack seems sufficient even for KASAN builds - the additional usage seems to be in the 2-3KB range for a 64-bit KASAN build. So only increase the stack for KASAN if the stack size is < 32KB. Fixes: 18f14afe2816 ("powerpc/64s: Increase default stack size to 32KB") Reported-by: Spoorthy Reported-by: Benjamin Gray Reviewed-by: Benjamin Gray Link: https://lore.kernel.org/linuxppc-dev/bug-207129-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240212064244.3924505-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index af58f1ed3952e..c4b798aa6ce80 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT -- GitLab From 0e01ccadfdf7be79ac99276905cb590cce632f10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:19:04 +0100 Subject: [PATCH 0061/1418] i2c: pasemi: split driver into two separate modules [ Upstream commit f44bff19268517ee98e80e944cad0f04f1db72e3 ] On powerpc, it is possible to compile test both the new apple (arm) and old pasemi (powerpc) drivers for the i2c hardware at the same time, which leads to a warning about linking the same object file twice: scripts/Makefile.build:244: drivers/i2c/busses/Makefile: i2c-pasemi-core.o is added to multiple modules: i2c-apple i2c-pasemi Rework the driver to have an explicit helper module, letting Kbuild take care of whether this should be built-in or a loadable driver. Fixes: 9bc5f4f660ff ("i2c: pasemi: Split pci driver to its own file") Signed-off-by: Arnd Bergmann Reviewed-by: Sven Peter Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/Makefile | 6 ++---- drivers/i2c/busses/i2c-pasemi-core.c | 5 +++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e73cdb1d2b5a8..784a803279d99 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,10 +89,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58cc07..f297e41352e7a 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -356,3 +356,8 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson "); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); -- GitLab From 491528935c9c48bf341d8b40eabc6c4fc5df6f2c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 14 Feb 2024 15:59:39 +0100 Subject: [PATCH 0062/1418] i2c: i801: Fix block process call transactions [ Upstream commit c1c9d0f6f7f1dbf29db996bd8e166242843a5f21 ] According to the Intel datasheets, software must reset the block buffer index twice for block process call transactions: once before writing the outgoing data to the buffer, and once again before reading the incoming data from the buffer. The driver is currently missing the second reset, causing the wrong portion of the block buffer to be read. Signed-off-by: Jean Delvare Reported-by: Piotr Zakowski Closes: https://lore.kernel.org/linux-i2c/20240213120553.7b0ab120@endymion.delvare/ Fixes: 315cd67c9453 ("i2c: i801: Add Block Write-Block Read Process Call support") Reviewed-by: Alexander Sverdlin Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-i801.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3159ffbb77a20..9a4e9bf304c28 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -500,11 +500,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EPROTO; data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } -- GitLab From 3e409fb74007a6d1dc85073a0eb67681bd49f6a5 Mon Sep 17 00:00:00 2001 From: Radek Krejci Date: Wed, 14 Feb 2024 10:14:07 +0100 Subject: [PATCH 0063/1418] modpost: trim leading spaces when processing source files list [ Upstream commit 5d9a16b2a4d9e8fa028892ded43f6501bc2969e5 ] get_line() does not trim the leading spaces, but the parse_source_files() expects to get lines with source files paths where the first space occurs after the file path. Fixes: 70f30cfe5b89 ("modpost: use read_text_file() and get_line() for reading text files") Signed-off-by: Radek Krejci Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/sumversion.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca09684..a72e6cf61a1f0 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); -- GitLab From 7857e35ef10e1a9cd81204a6250f820477a17fb8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 11 Aug 2023 17:57:26 +0200 Subject: [PATCH 0064/1418] mptcp: get rid of msk->subflow commit 39880bd808ad2ddfb9b7fee129568c3b814f0609 upstream. This is a partial backport of the upstram commit 39880bd808ad ("mptcp: get rid of msk->subflow"). It's partial to avoid a long a complex dependency chain not suitable for stable. The only bit remaning from the original commit is the introduction of a new field avoid a race at close time causing an UaF: BUG: KASAN: use-after-free in mptcp_subflow_queue_clean+0x2c9/0x390 include/net/mptcp.h:104 Read of size 1 at addr ffff88803bf72884 by task syz-executor.6/23092 CPU: 0 PID: 23092 Comm: syz-executor.6 Not tainted 6.1.65-gc6114c845984 #50 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x125/0x18f lib/dump_stack.c:106 print_report+0x163/0x4f0 mm/kasan/report.c:284 kasan_report+0xc4/0x100 mm/kasan/report.c:495 mptcp_subflow_queue_clean+0x2c9/0x390 include/net/mptcp.h:104 mptcp_check_listen_stop+0x190/0x2a0 net/mptcp/protocol.c:3009 __mptcp_close+0x9a/0x970 net/mptcp/protocol.c:3024 mptcp_close+0x2a/0x130 net/mptcp/protocol.c:3089 inet_release+0x13d/0x190 net/ipv4/af_inet.c:429 sock_close+0xcf/0x230 net/socket.c:652 __fput+0x3a2/0x870 fs/file_table.c:320 task_work_run+0x24e/0x300 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop+0xa4/0xc0 kernel/entry/common.c:171 exit_to_user_mode_prepare+0x51/0x90 kernel/entry/common.c:204 syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:286 do_syscall_64+0x53/0xa0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x41d791 Code: 75 14 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 74 2a 00 00 c3 48 83 ec 08 e8 9a fc ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 e3 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00000000008bfb90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 000000000041d791 RDX: 0000001b33920000 RSI: ffffffff8139adff RDI: 0000000000000003 RBP: 000000000079d980 R08: 0000001b33d20000 R09: 0000000000000951 R10: 000000008139a955 R11: 0000000000000293 R12: 00000000000c739b R13: 000000000079bf8c R14: 00007fa301053000 R15: 00000000000c705a Allocated by task 22528: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x40/0x70 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa0/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:955 [inline] __kmalloc+0xaa/0x1c0 mm/slab_common.c:968 kmalloc include/linux/slab.h:558 [inline] sk_prot_alloc+0xac/0x200 net/core/sock.c:2038 sk_clone_lock+0x56/0x1090 net/core/sock.c:2236 inet_csk_clone_lock+0x26/0x420 net/ipv4/inet_connection_sock.c:1141 tcp_create_openreq_child+0x30/0x1910 net/ipv4/tcp_minisocks.c:474 tcp_v6_syn_recv_sock+0x413/0x1a90 net/ipv6/tcp_ipv6.c:1283 subflow_syn_recv_sock+0x621/0x1300 net/mptcp/subflow.c:730 tcp_get_cookie_sock+0xf0/0x5f0 net/ipv4/syncookies.c:201 cookie_v6_check+0x130f/0x1c50 net/ipv6/syncookies.c:261 tcp_v6_do_rcv+0x7e0/0x12b0 net/ipv6/tcp_ipv6.c:1147 tcp_v6_rcv+0x2494/0x2f50 net/ipv6/tcp_ipv6.c:1743 ip6_protocol_deliver_rcu+0xba3/0x1620 net/ipv6/ip6_input.c:438 ip6_input+0x1bc/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:302 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5525 process_backlog+0x353/0x660 net/core/dev.c:5967 __napi_poll+0xc6/0x5a0 net/core/dev.c:6534 net_rx_action+0x652/0xea0 net/core/dev.c:6601 __do_softirq+0x176/0x525 kernel/softirq.c:571 Freed by task 23093: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x40/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x50 mm/kasan/generic.c:516 ____kasan_slab_free+0x13a/0x1b0 mm/kasan/common.c:236 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook mm/slub.c:1750 [inline] slab_free mm/slub.c:3661 [inline] __kmem_cache_free+0x1eb/0x340 mm/slub.c:3674 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x4ad/0x620 net/core/sock.c:2160 tcp_v6_rcv+0x269c/0x2f50 net/ipv6/tcp_ipv6.c:1761 ip6_protocol_deliver_rcu+0xba3/0x1620 net/ipv6/ip6_input.c:438 ip6_input+0x1bc/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:302 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5525 process_backlog+0x353/0x660 net/core/dev.c:5967 __napi_poll+0xc6/0x5a0 net/core/dev.c:6534 net_rx_action+0x652/0xea0 net/core/dev.c:6601 __do_softirq+0x176/0x525 kernel/softirq.c:571 The buggy address belongs to the object at ffff88803bf72000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 2180 bytes inside of 4096-byte region [ffff88803bf72000, ffff88803bf73000) The buggy address belongs to the physical page: page:00000000a72e4e51 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x3bf70 head:00000000a72e4e51 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x100000000010200(slab|head|node=0|zone=1) raw: 0100000000010200 ffffea0000a0ea00 dead000000000002 ffff888100042140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803bf72780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803bf72800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803bf72880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803bf72900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803bf72980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Prevent the MPTCP worker from freeing the first subflow for unaccepted socket when such sockets transition to TCP_CLOSE state, and let that happen at accept() or listener close() time. Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Reported-by: Christoph Paasch Tested-by: Christoph Paasch Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 9 ++++----- net/mptcp/protocol.h | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76539d1004ebb..db5369b6442d0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2422,7 +2422,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out_release; } - dispose_it = !msk->subflow || ssk != msk->subflow->sk; + dispose_it = msk->free_first || ssk != msk->first; if (dispose_it) list_del(&subflow->node); @@ -2440,7 +2440,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); - msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; @@ -3341,10 +3340,10 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ mptcp_dispose_initial_subflow(msk); + + /* allow the following to close even the initial subflow */ + msk->free_first = 1; mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4ec8e0a81b5a4..e5d553dfc13fd 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -287,7 +287,8 @@ struct mptcp_sock { cork:1, nodelay:1, fastopening:1, - in_accept_queue:1; + in_accept_queue:1, + free_first:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; -- GitLab From 6673d9f1c2cd984390550dbdf7d5ae07b20abbf8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 22:49:46 +0100 Subject: [PATCH 0065/1418] mptcp: fix data re-injection from stale subflow commit b6c620dc43ccb4e802894e54b651cf81495e9598 upstream. When the MPTCP PM detects that a subflow is stale, all the packet scheduler must re-inject all the mptcp-level unacked data. To avoid acquiring unneeded locks, it first try to check if any unacked data is present at all in the RTX queue, but such check is currently broken, as it uses TCP-specific helper on an MPTCP socket. Funnily enough fuzzers and static checkers are happy, as the accessed memory still belongs to the mptcp_sock struct, and even from a functional perspective the recovery completed successfully, as the short-cut test always failed. A recent unrelated TCP change - commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") - exposed the issue, as the tcp field reorganization makes the mptcp code always skip the re-inection. Fix the issue dropping the bogus call: we are on a slow path, the early optimization proved once again to be evil. Fixes: 1e1d9d6f119c ("mptcp: handle pending data on closed subflow") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/468 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-1-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index db5369b6442d0..0bff0105093f6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2336,9 +2336,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(mptcp_sk(sk))) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue -- GitLab From 66e142fbe13fa57ea305a7ba1f60297d0a467f44 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:47 +0100 Subject: [PATCH 0066/1418] selftests: mptcp: add missing kconfig for NF Filter commit 3645c844902bd4e173d6704fc2a37e8746904d67 upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table. It is then required to have IP_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae8..2a00bf4acdfa8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_ACT_CSUM=m -- GitLab From 2c7337ec22bc1a34f2fc4b4c2ae0f5e4b1a45b63 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:48 +0100 Subject: [PATCH 0067/1418] selftests: mptcp: add missing kconfig for NF Filter in v6 commit 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table for IPv6. It is then required to have IP6_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-3-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2a00bf4acdfa8..26fe466f803dd 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y -- GitLab From fc0e9cff9db02701dcb09bb70cf1531abab9b422 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:49 +0100 Subject: [PATCH 0068/1418] selftests: mptcp: add missing kconfig for NF Mangle commit 2d41f10fa497182df9012d3e95d9cea24eb42e61 upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Mangle table, only in IPv4. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-4-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 26fe466f803dd..4f80014cae494 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -23,6 +23,7 @@ CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m -- GitLab From d50d031919b259e49957616f3e79b72a7e7b99aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:50 +0100 Subject: [PATCH 0069/1418] selftests: mptcp: increase timeout to 30 min commit 4d4dfb2019d7010efb65926d9d1c1793f9a367c6 upstream. On very slow environments -- e.g. when QEmu is used without KVM --, mptcp_join.sh selftest can take a bit more than 20 minutes. Bump the default timeout by 50% as it seems normal to take that long on some environments. When a debug kernel config is used, this selftest will take even longer, but that's certainly not a common test env to consider for the timeout. The Fixes tag that has been picked here is there simply to help having this patch backported to older stable versions. It is difficult to point to the exact commit that made some env reaching the timeout from time to time. Fixes: d17b968b9876 ("selftests: mptcp: increase timeout to 20 minutes") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-5-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db6..abc5648b59abd 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 -- GitLab From d288d2e3e65a3a3b7b4e0eb708c394145e5120d5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:49 +0100 Subject: [PATCH 0070/1418] mptcp: drop the push_pending field commit bdd70eb68913c960acb895b00a8c62eb64715b1f upstream. Such field is there to avoid acquiring the data lock in a few spots, but it adds complexity to the already non trivial locking schema. All the relevant call sites (mptcp-level re-injection, set socket options), are slow-path, drop such field in favor of 'cb_flags', adding the relevant locking. This patch could be seen as an improvement, instead of a fix. But it simplifies the next patch. The 'Fixes' tag has been added to help having this series backported to stable. Fixes: e9d09baca676 ("mptcp: avoid atomic bit manipulation when possible") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 12 ++++++------ net/mptcp/protocol.h | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0bff0105093f6..859b18cb8e4f6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1582,8 +1582,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } void __mptcp_push_pending(struct sock *sk, unsigned int flags) @@ -3140,7 +3143,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3384,8 +3386,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3401,7 +3402,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e5d553dfc13fd..259672cc344f3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -272,7 +272,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; -- GitLab From e373bfc8ec3d6496ec7e11dd7f4d087a44b1009a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 8 Feb 2024 19:03:53 +0100 Subject: [PATCH 0071/1418] mptcp: check addrs list in userspace_pm_get_local_id commit f012d796a6de662692159c539689e47e662853a8 upstream. Before adding a new entry in mptcp_userspace_pm_get_local_id(), it's better to check whether this address is already in userspace pm local address list. If it's in the list, no need to add a new entry, just return it's address ID and use this address. Fixes: 8b20137012d9 ("mptcp: read attributes of addr entries managed by userspace PMs") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 38cbdc66d8bff..2e1e0d0e3ec60 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -132,10 +132,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; -- GitLab From 75500e7ba2a2719ff8a6721a5ac17954eae55bf9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:00 +0100 Subject: [PATCH 0072/1418] media: Revert "media: rkisp1: Drop IRQF_SHARED" commit a107d643b2a3382e0a2d2c4ef08bf8c6bff4561d upstream. This reverts commit 85d2a31fe4d9be1555f621ead7a520d8791e0f74. The rkisp1 does share interrupt lines on some platforms, after all. Thus we need to revert this, and implement a fix for the rkisp1 shared irq handling in a follow-up patch. Closes: https://lore.kernel.org/all/87o7eo8vym.fsf@gmail.com/ Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-1-173007628248@ideasonboard.com Reported-by: Mikhail Rudenko Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index aeb6bb63667eb..41abb18b00acb 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -559,7 +559,7 @@ static int rkisp1_probe(struct platform_device *pdev) rkisp1->irqs[il] = irq; } - ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0, + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { dev_err(dev, "request irq failed: %d\n", ret); -- GitLab From 6bb22ac1d11d7d20f91e7fd2e657a9e5f6db65e0 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 9 Feb 2024 10:07:34 -0800 Subject: [PATCH 0073/1418] scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock" commit 977fe773dcc7098d8eaf4ee6382cb51e13e784cb upstream. This reverts commit 1a1975551943f681772720f639ff42fbaa746212. This commit causes interrupts to be lost for FCoE devices, since it changed sping locks from "bh" to "irqsave". Instead, a work queue should be used, and will be addressed in a separate commit. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Signed-off-by: Lee Duncan Link: https://lore.kernel.org/r/c578cdcd46b60470535c4c4a953e6a1feca0dffd.1707500786.git.lduncan@suse.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fcoe/fcoe_ctlr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 8a4124e7d2043..ddc048069af25 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** -- GitLab From ff70e6ff6fc2413caf33410af7462d1f584d927e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Feb 2024 23:52:54 -0600 Subject: [PATCH 0074/1418] Revert "drm/amd: flush any delayed gfxoff on suspend entry" commit 916361685319098f696b798ef1560f69ed96e934 upstream. commit ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") caused GFXOFF control to be used more heavily and the codepath that was removed from commit 0dee72639533 ("drm/amd: flush any delayed gfxoff on suspend entry") now can be exercised at suspend again. Users report that by using GNOME to suspend the lockscreen trigger will cause SDMA traffic and the system can deadlock. This reverts commit 0dee726395333fea833eaaf838bc80962df886c8. Acked-by: Alex Deucher Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4b91f95066eca..6a4749c0c5a58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4203,7 +4203,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 23f0067f92e4e..b803e785d3aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -585,8 +585,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { -- GitLab From dc3890441c9eb21b28d9831b269d394f4674ab29 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jan 2024 19:14:14 +0100 Subject: [PATCH 0075/1418] drm/virtio: Set segment size for virtio_gpu device commit 9c64e749cebd9c2d3d55261530a98bcccb83b950 upstream. Set the segment size of the virtio_gpu device to the value used by the drm helpers when allocating sg lists to fix the following complaint from DMA_API debug code: DMA-API: virtio-pci 0000:07:00.0: mapping sg segment longer than device claims to support [len=262144] [max=65536] Cc: stable@vger.kernel.org Tested-by: Zhenyu Zhang Acked-by: Vivek Kasireddy Signed-off-by: Sebastian Ott Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/7258a4cc-da16-5c34-a042-2a23ee396d56@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 0035affc3e590..9b2d235168bb6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -93,6 +93,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; -- GitLab From d0302e2a5732fdc9706bc5d21ac349a58f6f7d15 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 11:44:03 +0100 Subject: [PATCH 0076/1418] lsm: fix the logic in security_inode_getsecctx() commit 99b817c173cd213671daecd25ca27f56b0c7c4ec upstream. The inode_getsecctx LSM hook has previously been corrected to have -EOPNOTSUPP instead of 0 as the default return value to fix BPF LSM behavior. However, the call_int_hook()-generated loop in security_inode_getsecctx() was left treating 0 as the neutral value, so after an LSM returns 0, the loop continues to try other LSMs, and if one of them returns a non-zero value, the function immediately returns with said value. So in a situation where SELinux and the BPF LSMs registered this hook, -EOPNOTSUPP would be incorrectly returned whenever SELinux returned 0. Fix this by open-coding the call_int_hook() loop and making it use the correct LSM_RET_DEFAULT() value as the neutral one, similar to what other hooks do. Cc: stable@vger.kernel.org Reported-by: Stephen Smalley Link: https://lore.kernel.org/selinux/CAEjxPJ4ev-pasUwGx48fDhnmjBnq_Wh90jYPwRQRAqXxmOKD4Q@mail.gmail.com/ Link: https://bugzilla.redhat.com/show_bug.cgi?id=2257983 Fixes: b36995b8609a ("lsm: fix default return value for inode_getsecctx") Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/security.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/security.c b/security/security.c index fc15b963e1028..1b504c296551c 100644 --- a/security/security.c +++ b/security/security.c @@ -2186,7 +2186,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); -- GitLab From e1aae84f42ecaca62afff24aa741ca3b7a8c2906 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 0077/1418] firewire: core: correct documentation of fw_csr_string() kernel API commit 5f9ab17394f831cb7986ec50900fa37507a127f1 upstream. Against its current description, the kernel API can accepts all types of directory entries. This commit corrects the documentation. Cc: stable@vger.kernel.org Fixes: 3c2c58cb33b3 ("firewire: core: fw_csr_string addendum") Link: https://lore.kernel.org/r/20240130100409.30128-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/core-device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 74bab06283b71..1879ec27c0236 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -100,10 +100,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ -- GitLab From 38acb2e9be54afbc2914ff4eaf3b4b2ab5e67320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Relvas?= Date: Wed, 31 Jan 2024 11:34:09 +0000 Subject: [PATCH 0078/1418] ALSA: hda/realtek: Apply headset jack quirk for non-bass alc287 thinkpads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2468e8922d2f6da81a6192b73023eff67e3fefdd upstream. There currently exists two thinkpad headset jack fixups: ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK ALC285_FIXUP_THINKPAD_HEADSET_JACK The latter is applied to alc285 and alc287 thinkpads which contain bass speakers. However, the former was only being applied to alc285 thinkpads, leaving non-bass alc287 thinkpads with no headset button controls. This patch fixes that by adding ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK to the alc287 chains, allowing the detection of headset buttons. Signed-off-by: José Relvas Cc: Link: https://lore.kernel.org/r/20240131113407.34698-3-josemonsantorelvas@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 274f528eba0ba..0c386820861cf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9377,7 +9377,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = { .type = HDA_FIXUP_FUNC, @@ -9392,6 +9392,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, -- GitLab From 76690354e6ace5ed2e0c579906bce44d7ccc9e4e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 12 Feb 2024 19:05:10 -0700 Subject: [PATCH 0079/1418] kbuild: Fix changing ELF file type for output of gen_btf for big endian commit e3a9ee963ad8ba677ca925149812c5932b49af69 upstream. Commit 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") changed the ELF type of .btf.vmlinux.bin.o to ET_REL via dd, which works fine for little endian platforms: 00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 03 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| +00000010 01 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| However, for big endian platforms, it changes the wrong byte, resulting in an invalid ELF file type, which ld.lld rejects: 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 01 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: : 103 ld.lld: error: .btf.vmlinux.bin.o: unknown file type Fix this by updating the entire 16-bit e_type field rather than just a single byte, so that everything works correctly for all platforms and linkers. 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 00 01 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: REL (Relocatable file) While in the area, update the comment to mention that binutils 2.35+ matches LLD's behavior of rejecting an ET_EXEC input, which occurred after the comment was added. Cc: stable@vger.kernel.org Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Link: https://github.com/llvm/llvm-project/pull/75643 Suggested-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Reviewed-by: Nicolas Schier Reviewed-by: Kees Cook Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf03..458b2948b580d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file -- GitLab From 5c0c5ffaed73cbae6c317374dc32ba6cacc60895 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 25 Jan 2024 12:53:09 +0300 Subject: [PATCH 0080/1418] nfc: nci: free rx_data_reassembly skb on NCI device cleanup commit bfb007aebe6bff451f7f3a4be19f4f286d0d5d9c upstream. rx_data_reassembly skb is stored during NCI data exchange for processing fragmented packets. It is dropped only when the last fragment is processed or when an NTF packet with NCI_OP_RF_DEACTIVATE_NTF opcode is received. However, the NCI device may be deallocated before that which leads to skb leak. As by design the rx_data_reassembly skb is bound to the NCI device and nothing prevents the device to be freed before the skb is processed in some way and cleaned, free it on the NCI device cleanup. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Cc: stable@vger.kernel.org Reported-by: syzbot+6b7c68d9c21e4ee4251b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000f43987060043da7b@google.com/ Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7535afd1537e9..b5071a2f597d4 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1207,6 +1207,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); -- GitLab From 56440799fc4621c279df16176f83a995d056023a Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jan 2024 02:21:47 -0800 Subject: [PATCH 0081/1418] net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() commit 37e8c97e539015637cb920d3e6f1e404f707a06e upstream. Syzkaller reported [1] hitting a warning after failing to allocate resources for skb in hsr_init_skb(). Since a WARN_ONCE() call will not help much in this case, it might be prudent to switch to netdev_warn_once(). At the very least it will suppress syzkaller reports such as [1]. Just in case, use netdev_warn_once() in send_prp_supervision_frame() for similar reasons. [1] HSR: Could not send supervision frame WARNING: CPU: 1 PID: 85 at net/hsr/hsr_device.c:294 send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 RIP: 0010:send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 ... Call Trace: hsr_announce+0x114/0x370 net/hsr/hsr_device.c:382 call_timer_fn+0x193/0x590 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x764/0xb20 kernel/time/timer.c:2022 run_timer_softirq+0x58/0xd0 kernel/time/timer.c:2035 __do_softirq+0x21a/0x8de kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0xb7/0x120 kernel/softirq.c:644 sysvec_apic_timer_interrupt+0x95/0xb0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:649 ... This issue is also found in older kernels (at least up to 5.10). Cc: stable@vger.kernel.org Reported-by: syzbot+3ae0a3f42c84074b7c8e@syzkaller.appspotmail.com Fixes: 121c33b07b31 ("net: hsr: introduce common code for skb initialization") Signed-off-by: Nikita Zhandarovich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b1e86a7265b32..83906d093f0ae 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } -- GitLab From 55e891f4a27269847b151b6bfd77ac720408d176 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:41 +0100 Subject: [PATCH 0082/1418] net: stmmac: do not clear TBS enable bit on link up/down commit 4896bb7c0b31a0a3379b290ea7729900c59e0c69 upstream. With the dma conf being reallocated on each call to stmmac_open(), any information in there is lost, unless we specifically handle it. The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc therefore would stop working when link was set down and then back up. Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Cc: stable@vger.kernel.org Signed-off-by: Esben Haabendal Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e988a60c8561b..66178ce6d000e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3826,6 +3826,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); -- GitLab From 6286435cd06c560d46bcc22e5d6b411eec464c43 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Jan 2024 14:03:08 +0100 Subject: [PATCH 0083/1418] xen-netback: properly sync TX responses commit 7b55984c96ffe9e236eb9c82a2196e0b1f84990d upstream. Invoking the make_tx_response() / push_tx_responses() pair with no lock held would be acceptable only if all such invocations happened from the same context (NAPI instance or dealloc thread). Since this isn't the case, and since the interface "spec" also doesn't demand that multicast operations may only be performed with no in-flight transmits, MCAST_{ADD,DEL} processing also needs to acquire the response lock around the invocations. To prevent similar mistakes going forward, "downgrade" the present functions to private helpers of just the two remaining ones using them directly, with no forward declarations anymore. This involves renaming what so far was make_tx_response(), for the new function of that name to serve the new (wrapper) purpose. While there, - constify the txp parameters, - correct xenvif_idx_release()'s status parameter's type, - rename {,_}make_tx_response()'s status parameters for consistency with xenvif_idx_release()'s. Fixes: 210c34dcd8d9 ("xen-netback: add support for multicast control") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/980c6c3d-e10e-4459-8565-e8fbde122f00@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 84 +++++++++++++++---------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 2716040985748..0d51c900c5538 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -997,7 +981,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1009,7 +992,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1444,8 +1426,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1455,8 +1464,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1470,32 +1479,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) -- GitLab From a5767decf74343fd4808b751c100161e2b489ffe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Jun 2023 18:41:59 +0900 Subject: [PATCH 0084/1418] modpost: propagate W=1 build option to modpost commit 20ff36856fe00879f82de71fe6f1482ca1b72334 upstream. "No build warning" is a strong requirement these days, so you must fix all issues before enabling a new warning flag. We often add a new warning to W=1 first so that the kbuild test robot blocks new breakages. This commit allows modpost to show extra warnings only when W=1 (or KBUILD_EXTRA_WARN=1) is given. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.modpost | 1 + scripts/mod/modpost.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d429c..39aea753d0bdc 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ + $(if $(findstring 1, $(KBUILD_EXTRA_WARN)),-W) \ -o $@ # 'make -i -k' ignores compile errors, and builds as many modules as possible. diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e6be7fc2625fd..9ef2f6423e808 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -41,6 +41,8 @@ static bool allow_missing_ns_imports; static bool error_occurred; +static bool extra_warn; + /* * Cut off the warnings when there are too many. This typically occurs when * vmlinux is missing. ('make modules' without building vmlinux.) @@ -2290,7 +2292,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:mnT:o:awENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:mnT:o:aWwENd:")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2315,6 +2317,9 @@ int main(int argc, char **argv) case 'T': files_source = optarg; break; + case 'W': + extra_warn = true; + break; case 'w': warn_unresolved = true; break; -- GitLab From 999ecc936a99cdd3392a652fa3686e74bae5d671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 30 Sep 2023 18:52:04 +0200 Subject: [PATCH 0085/1418] modpost: Don't let "driver"s reference .exit.* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f177cd0c15fcc7bdbb68d8d1a3166dead95314c8 upstream. Drivers must not reference functions marked with __exit as these likely are not available when the code is built-in. There are few creative offenders uncovered for example in ARCH=amd64 allmodconfig builds. So only trigger the section mismatch warning for W=1 builds. The dual rule that drivers must not reference .init.* is implemented since commit 0db252452378 ("modpost: don't allow *driver to reference .init.*") which however missed that .exit.* should be handled in the same way. Thanks to Masahiro Yamada and Arnd Bergmann who gave valuable hints to find this improvement. Signed-off-by: Uwe Kleine-König Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9ef2f6423e808..9a4a220ecbc26 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1087,9 +1087,20 @@ static int secref_whitelist(const struct sectioncheck *mismatch, "*_console"))) return 0; - /* symbols in data sections that may refer to meminit/exit sections */ + /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_EXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until + * these are fixed. + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && + match(tosec, PATTERNS(EXIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; -- GitLab From 519b7da44ee4d84402522e860642a20183cb86db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 23 Oct 2023 02:06:05 +0900 Subject: [PATCH 0086/1418] linux/init: remove __memexit* annotations commit 6a4e59eeedc3018cb57722eecfcbb49431aeb05f upstream. We have never used __memexit, __memexitdata, or __memexitconst. These were unneeded. Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann [nathan: Remove additional case of XXXEXIT_TO_SOME_EXIT due to lack of 78dac1a22944 in 6.1] Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 6 ------ include/linux/init.h | 3 --- scripts/mod/modpost.c | 16 +++------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7ad6f51b3d914..1d1f480a5e9e4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -351,7 +351,6 @@ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ - MEM_KEEP(exit.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -546,7 +545,6 @@ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ MEM_KEEP(init.rodata) \ - MEM_KEEP(exit.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -601,7 +599,6 @@ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ MEM_KEEP(init.text*) \ - MEM_KEEP(exit.text*) \ /* sched.text is aling to function alignment to secure we have same @@ -751,13 +748,10 @@ *(.exit.data .exit.data.*) \ *(.fini_array .fini_array.*) \ *(.dtors .dtors.*) \ - MEM_DISCARD(exit.data*) \ - MEM_DISCARD(exit.rodata*) #define EXIT_TEXT \ *(.exit.text) \ *(.text.exit) \ - MEM_DISCARD(exit.text) #define EXIT_CALL \ *(.exitcall.exit) diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b402f..c96aea3229ca1 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -87,9 +87,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9a4a220ecbc26..61a13d55c0e31 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -811,7 +811,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_TEXT_SECTIONS \ ".init.text", ".meminit.text" #define ALL_EXIT_TEXT_SECTIONS \ - ".exit.text", ".memexit.text" + ".exit.text" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ @@ -819,10 +819,9 @@ static void check_section(const char *modname, struct elf_info *elf, ".pci_fixup_resume_early", ".pci_fixup_suspend" #define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS -#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS -#define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS +#define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ @@ -835,7 +834,6 @@ static void check_section(const char *modname, struct elf_info *elf, #define MEM_INIT_SECTIONS ".meminit.*" #define EXIT_SECTIONS ".exit.*" -#define MEM_EXIT_SECTIONS ".memexit.*" #define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS @@ -864,7 +862,6 @@ enum mismatch { TEXT_TO_ANY_EXIT, DATA_TO_ANY_EXIT, XXXINIT_TO_SOME_INIT, - XXXEXIT_TO_SOME_EXIT, ANY_INIT_TO_ANY_EXIT, ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, @@ -939,12 +936,6 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_SOME_INIT, }, -/* Do not reference exit code/data from memexit code/data */ -{ - .fromsec = { ALL_XXXEXIT_SECTIONS, NULL }, - .bad_tosec = { EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -1089,7 +1080,7 @@ static int secref_whitelist(const struct sectioncheck *mismatch, /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; @@ -1267,7 +1258,6 @@ static void report_sec_mismatch(const char *modname, case TEXT_TO_ANY_EXIT: case DATA_TO_ANY_EXIT: case XXXINIT_TO_SOME_INIT: - case XXXEXIT_TO_SOME_EXIT: case ANY_INIT_TO_ANY_EXIT: case ANY_EXIT_TO_ANY_INIT: warn("%s: section mismatch in reference: %s (section: %s) -> %s (section: %s)\n", -- GitLab From 36fbcadc208e968008ed3ffb89cb2a265c4f276c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:29 -0700 Subject: [PATCH 0087/1418] modpost: Include '.text.*' in TEXT_SECTIONS commit 19331e84c3873256537d446afec1f6c507f8c4ef upstream. Commit 6c730bfc894f ("modpost: handle -ffunction-sections") added ".text.*" to the OTHER_TEXT_SECTIONS macro to fix certain section mismatch warnings. Unfortunately, this makes it impossible for modpost to warn about section mismatches with LTO, which implies '-ffunction-sections', as all functions are put in their own '.text.' sections, which may still reference functions in sections they are not supposed to, such as __init. Fix this by moving ".text.*" into TEXT_SECTIONS, so that configurations with '-ffunction-sections' will see warnings about mismatched sections. Link: https://lore.kernel.org/Y39kI3MOtVI5BAnV@google.com/ Reported-by: Vincent Donnefort Reviewed-and-tested-by: Alexander Lobakin Reviewed-by: Sami Tolvanen Tested-by: Vincent Donnefort Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 61a13d55c0e31..233f8eae09a67 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -824,10 +824,10 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" -- GitLab From 064cb9dd10ffd637c212ca9afa3617961975d1fb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:54 -0700 Subject: [PATCH 0088/1418] um: Fix adding '-no-pie' for clang commit 846cfbeed09b45d985079a9173cf390cc053715b upstream. The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link vmlinux with -no-pie") added the compiler linker flag '-no-pie' via cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. While this works for GCC, this does not work for clang because cc-option uses '-c', which stops the pipeline right before linking, so '-no-pie' is unconsumed and clang warns, causing cc-option to fail just as it would if the option was entirely unsupported: $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] A recent version of clang exposes this because it generates a relocation under '-mcmodel=large' that is not supported in PIE mode: /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE /usr/sbin/ld: failed to set dynamic section sizes: bad value clang: error: linker command failed with exit code 1 (use -v to see invocation) Remove the cc-option check altogether. It is wasteful to invoke the compiler to check for '-no-pie' because only one supported compiler version does not support it, GCC 5.x (as it is supported with the minimum version of clang and GCC 6.1.0+). Use a combination of the gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add '-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all compilers that support this. Furthermore, using gcc-min-version can help turn this back into LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie when the minimum version of GCC is bumped past 6.1.0. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/um/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 3dbd0e3b660ea..778c50f273992 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ -- GitLab From 5953f2c7f84d633fd9cbe5975c298fa04308bcb5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:55 -0700 Subject: [PATCH 0089/1418] modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS commit 397586506c3da005b9333ce5947ad01e8018a3be upstream. After the linked LLVM change, building ARCH=um defconfig results in a segmentation fault in modpost. Prior to commit a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()"), there was a warning: WARNING: modpost: vmlinux.o(__ex_table+0x88): Section mismatch in reference to the .ltext:(unknown) WARNING: modpost: The relocation at __ex_table+0x88 references section ".ltext" which is not in the list of authorized sections. If you're adding a new section and/or if this reference is valid, add ".ltext" to the list of authorized sections to jump to on fault. This can be achieved by adding ".ltext" to OTHER_TEXT_SECTIONS in scripts/mod/modpost.c. The linked LLVM change moves global objects to the '.ltext' (and '.ltext.*' with '-ffunction-sections') sections with '-mcmodel=large', which ARCH=um uses. These sections should be handled just as '.text' and '.text.*' are, so add them to TEXT_SECTIONS. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1981 Link: https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c13c7af130 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 233f8eae09a67..686eed37f9781 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -825,7 +825,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" -- GitLab From d27f6d6eacacd8d25584e8c5cc59cfaebc81ca70 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Thu, 1 Feb 2024 09:21:14 -0300 Subject: [PATCH 0090/1418] ALSA: hda/realtek: Enable headset mic on Vaio VJFE-ADL commit c7de2d9bb68a5fc71c25ff96705a80a76c8436eb upstream. Vaio VJFE-ADL is equipped with ALC269VC, and it needs ALC298_FIXUP_SPK_VOLUME quirk to make its headset mic work. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20240201122114.30080-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0c386820861cf..6e94161e46ce1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10053,6 +10053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), -- GitLab From a6bc85847272c4d7bd887ab9abdcd7213ff5dee7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:06 +0100 Subject: [PATCH 0091/1418] ASoC: codecs: wcd938x: handle deferred probe commit 086df711d9b886194481b4fbe525eb43e9ae7403 upstream. WCD938x sound codec driver ignores return status of getting regulators and returns EINVAL instead of EPROBE_DEFER. If regulator provider probes after the codec, system is left without probed audio: wcd938x_codec audio-codec: wcd938x_probe: Fail to obtain platform data wcd938x_codec: probe of audio-codec failed with error -22 Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a2abd1a111612..e80be4e4fa8b4 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3588,7 +3588,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); -- GitLab From 6c65eb988d4adff9f3a472d8484de4503641ee43 Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Mon, 22 Jan 2024 18:47:10 +0000 Subject: [PATCH 0092/1418] ALSA: hda/cs8409: Suppress vmaster control for Dolphin models commit a2ed0a44d637ef9deca595054c206da7d6cbdcbc upstream. Customer has reported an issue with specific desktop platform where two CS42L42 codecs are connected to CS8409 HDA bridge. If "Master Volume Control" is created then on Ubuntu OS UCM left/right balance slider in UI audio settings has no effect. This patch will fix this issue for a target paltform. Fixes: 20e507724113 ("ALSA: hda/cs8409: Add support for dolphin") Signed-off-by: Vitaly Rodionov Cc: Link: https://lore.kernel.org/r/20240122184710.5802-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cs8409.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959ffe8..e41316e2e9833 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; -- GitLab From f33789ca65d5e93c40389caffac927aa712d22a8 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 22 Jan 2024 15:48:24 +0800 Subject: [PATCH 0093/1418] ALSA: hda/realtek: fix mute/micmute LEDs for HP ZBook Power commit 1513664f340289cf10402753110f3cff12a738aa upstream. The HP ZBook Power using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240122074826.1020964-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6e94161e46ce1..153a9605d3a97 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9726,6 +9726,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 90e09c016d72b91e76de25f71c7b93d94cc3c769 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Wed, 31 Jan 2024 21:53:46 +0000 Subject: [PATCH 0094/1418] binder: signal epoll threads of self-work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 97830f3c3088638ff90b20dfba2eb4d487bf14d7 upstream. In (e)poll mode, threads often depend on I/O events to determine when data is ready for consumption. Within binder, a thread may initiate a command via BINDER_WRITE_READ without a read buffer and then make use of epoll_wait() or similar to consume any responses afterwards. It is then crucial that epoll threads are signaled via wakeup when they queue their own work. Otherwise, they risk waiting indefinitely for an event leaving their work unhandled. What is worse, subsequent commands won't trigger a wakeup either as the thread has pending work. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: Arve Hjønnevåg Cc: Martijn Coenen Cc: Alice Ryhl Cc: Steven Moreland Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240131215347.1808751-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d933ef6cc65af..55cd17a13e758 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -477,6 +477,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } -- GitLab From 4ab56381ac2621594270f165c211f0954af3ea2e Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Mon, 8 Jan 2024 17:18:33 +0530 Subject: [PATCH 0095/1418] misc: fastrpc: Mark all sessions as invalid in cb_remove commit a4e61de63e34860c36a71d1a364edba16fb6203b upstream. In remoteproc shutdown sequence, rpmsg_remove will get called which would depopulate all the child nodes that have been created during rpmsg_probe. This would result in cb_remove call for all the context banks for the remoteproc. In cb_remove function, session 0 is getting skipped which is not correct as session 0 will never become available again. Add changes to mark session 0 also as invalid. Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable Signed-off-by: Ekansh Gupta Link: https://lore.kernel.org/r/20240108114833.20480-1-quic_ekangupt@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index cc57cc8204328..69cc24962706c 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1990,7 +1990,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; -- GitLab From 185eab30486ba3e7bf8b9c2e049c79a06ffd2bc1 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:33 +0800 Subject: [PATCH 0096/1418] ext4: fix double-free of blocks due to wrong extents moved_len commit 55583e899a5357308274601364741a83e78d6ac4 upstream. In ext4_move_extents(), moved_len is only updated when all moves are successfully executed, and only discards orig_inode and donor_inode preallocations when moved_len is not zero. When the loop fails to exit after successfully moving some extents, moved_len is not updated and remains at 0, so it does not discard the preallocations. If the moved extents overlap with the preallocated extents, the overlapped extents are freed twice in ext4_mb_release_inode_pa() and ext4_process_freed_data() (as described in commit 94d7c16cbbbd ("ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT")), and bb_free is incremented twice. Hence when trim is executed, a zero-division bug is triggered in mb_update_avg_fragment_size() because bb_free is not zero and bb_fragments is zero. Therefore, update move_len after each extent move to avoid the issue. Reported-by: Wei Chen Reported-by: xingwei lee Closes: https://lore.kernel.org/r/CAO4mrferzqBUnCag8R3m2zf897ts9UEuhjFQGPtODT92rYyR2Q@mail.gmail.com Fixes: fcf6b1b729bc ("ext4: refactor ext4_move_extents code base") CC: # 3.18 Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/move_extent.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index dedc9d445f243..8e3ff150bc36b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -621,6 +621,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -675,7 +676,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -685,9 +686,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { -- GitLab From ac894a1e19b9ba2386dca85548a5c4fc106b9392 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:36 +0800 Subject: [PATCH 0097/1418] ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks() commit 2331fd4a49864e1571b4f50aa3aa1536ed6220d0 upstream. After updating bb_free in mb_free_blocks, it is possible to return without updating bb_fragments because the block being freed is found to have already been freed, which leads to inconsistency between bb_free and bb_fragments. Since the group may be unlocked in ext4_grp_locked_error(), this can lead to problems such as dividing by zero when calculating the average fragment length. Hence move the update of bb_free to after the block double-free check guarantees that the corresponding statistics are updated only after the core block bitmap is modified. Fixes: eabe0444df90 ("ext4: speed-up releasing blocks on commit") CC: # 3.10 Suggested-by: Jan Kara Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 40903c172a34f..1a310ee7d9e55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1785,11 +1785,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1803,23 +1798,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } else { - mb_regenerate_buddy(e4b); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1844,9 +1847,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } -- GitLab From 65bf19f55a87a13417b859965926f54ae16117b3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 9 Feb 2024 06:36:22 -0500 Subject: [PATCH 0098/1418] tracing: Fix wasted memory in saved_cmdlines logic commit 44dc5c41b5b1267d4dd037d26afc0c4d3a568acb upstream. While looking at improving the saved_cmdlines cache I found a huge amount of wasted memory that should be used for the cmdlines. The tracing data saves pids during the trace. At sched switch, if a trace occurred, it will save the comm of the task that did the trace. This is saved in a "cache" that maps pids to comms and exposed to user space via the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by default 128 comms. The structure that uses this creates an array to store the pids using PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure to be of the size of 131104 bytes on 64 bit machines. In hex: 131104 = 0x20020, and since the kernel allocates generic memory in powers of two, the kernel would allocate 0x40000 or 262144 bytes to store this structure. That leaves 131040 bytes of wasted space. Worse, the structure points to an allocated array to store the comm names, which is 16 bytes times the amount of names to save (currently 128), which is 2048 bytes. Instead of allocating a separate array, make the structure end with a variable length string and use the extra space for that. This is similar to a recommendation that Linus had made about eventfs_inode names: https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundation.org/ Instead of allocating a separate string array to hold the saved comms, have the structure end with: char saved_cmdlines[]; and round up to the next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN It will use this extra space for the saved_cmdline portion. Now, instead of saving only 128 comms by default, by using this wasted space at the end of the structure it can save over 8000 comms and even saves space by removing the need for allocating the other array. Link: https://lore.kernel.org/linux-trace-kernel/20240209063622.1f7b6d5f@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Cc: Sven Schnelle Cc: Mete Durlu Fixes: 939c7a4f04fcd ("tracing: Introduce saved_cmdlines_size file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 75 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2b3c4cd8382b3..fa6c193e22f02 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2249,7 +2249,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2263,47 +2263,58 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) { + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) +{ + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; - - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -5972,26 +5983,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; -- GitLab From c794117a33699bc2f14748cb4fa088b71804d0d8 Mon Sep 17 00:00:00 2001 From: David Schiller Date: Mon, 22 Jan 2024 14:49:17 +0100 Subject: [PATCH 0099/1418] staging: iio: ad5933: fix type mismatch regression commit 6db053cd949fcd6254cea9f2cd5d39f7bd64379c upstream. Commit 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") fixed a compiler warning, but introduced a bug that resulted in one of the two 16 bit IIO channels always being zero (when both are enabled). This is because int is 32 bits wide on most architectures and in the case of a little-endian machine the two most significant bytes would occupy the buffer for the second channel as 'val' is being passed as a void pointer to 'iio_push_to_buffers()'. Fix by defining 'val' as u16. Tested working on ARM64. Fixes: 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") Signed-off-by: David Schiller Link: https://lore.kernel.org/r/20240122134916.2137957-1-david.schiller@jku.at Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index f177b20f0f2d9..ceba632138940 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; -- GitLab From 176256ff8abff29335ecff905a09fb49e8dcf513 Mon Sep 17 00:00:00 2001 From: "zhili.liu" Date: Tue, 2 Jan 2024 09:07:11 +0800 Subject: [PATCH 0100/1418] iio: magnetometer: rm3100: add boundary check for the value read from RM3100_REG_TMRC commit 792595bab4925aa06532a14dd256db523eb4fa5e upstream. Recently, we encounter kernel crash in function rm3100_common_probe caused by out of bound access of array rm3100_samp_rates (because of underlying hardware failures). Add boundary check to prevent out of bound access. Fixes: 121354b2eceb ("iio: magnetometer: Add driver support for PNI RM3100") Suggested-by: Zhouyi Zhou Signed-off-by: zhili.liu Link: https://lore.kernel.org/r/1704157631-3814-1-git-send-email-zhouzhouyi@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/rm3100-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 69938204456f8..42b70cd42b393 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) -- GitLab From 359f220d0e753bba840eac19ffedcdc816b532f2 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 8 Dec 2023 15:31:19 +0800 Subject: [PATCH 0101/1418] iio: core: fix memleak in iio_device_register_sysfs commit 95a0d596bbd0552a78e13ced43f2be1038883c81 upstream. When iio_device_register_sysfs_group() fails, we should free iio_dev_opaque->chan_attr_group.attrs to prevent potential memleak. Fixes: 32f171724e5c ("iio: core: rework iio device group creation") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231208073119.29283-1-dinghao.liu@zju.edu.cn Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index c9614982cb671..a2f8278f00856 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1601,10 +1601,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); -- GitLab From 4f10423c0e34d7eaed61478c56520017d5eddf3e Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: [PATCH 0102/1418] iio: commom: st_sensors: ensure proper DMA alignment commit 862cf85fef85becc55a173387527adb4f076fab0 upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa Cc: Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index db4a1b260348c..c34e648f07e28 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -261,9 +261,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER -- GitLab From 77ba1a86ef2208ddfe3b9efc98e6006520cd8ab2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 31 Jan 2024 16:52:46 -0600 Subject: [PATCH 0103/1418] iio: accel: bma400: Fix a compilation problem commit 4cb81840d8f29b66d9d05c6d7f360c9560f7e2f4 upstream. The kernel fails when compiling without `CONFIG_REGMAP_I2C` but with `CONFIG_BMA400`. ``` ld: drivers/iio/accel/bma400_i2c.o: in function `bma400_i2c_probe': bma400_i2c.c:(.text+0x23): undefined reference to `__devm_regmap_init_i2c' ``` Link: https://download.01.org/0day-ci/archive/20240131/202401311634.FE5CBVwe-lkp@intel.com/config Fixes: 465c811f1f20 ("iio: accel: Add driver for the BMA400") Fixes: 9bea10642396 ("iio: accel: bma400: add support for bma400 spi") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240131225246.14169-1-mario.limonciello@amd.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index ffac66db7ac92..1f34747a68bfe 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL -- GitLab From 18cbe28671e2eacb19ff107ef67c4f74de606528 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: [PATCH 0104/1418] iio: adc: ad_sigma_delta: ensure proper DMA alignment commit 59598510be1d49e1cff7fd7593293bb8e1b2398b upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714c..719cf9cc6e1ac 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; -- GitLab From 9e105dd8c070a58c080381491452f038e7b78039 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: [PATCH 0105/1418] iio: imu: adis: ensure proper DMA alignment commit 8e98b87f515d8c4bae521048a037b2cc431c3fd5 upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bcbefb7574751..af083aa0c4317 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; -- GitLab From b79e15569d1a65f187344f97c7617e08fbccdf62 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Jan 2024 10:56:11 -0800 Subject: [PATCH 0106/1418] iio: imu: bno055: serdev requires REGMAP commit 35ec2d03b282a939949090bd8c39eb37a5856721 upstream. There are a ton of build errors when REGMAP is not set, so select REGMAP to fix all of them. Examples (not all of them): ../drivers/iio/imu/bno055/bno055_ser_core.c:495:15: error: variable 'bno055_ser_regmap_bus' has initializer but incomplete type 495 | static struct regmap_bus bno055_ser_regmap_bus = { ../drivers/iio/imu/bno055/bno055_ser_core.c:496:10: error: 'struct regmap_bus' has no member named 'write' 496 | .write = bno055_ser_write_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c:497:10: error: 'struct regmap_bus' has no member named 'read' 497 | .read = bno055_ser_read_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c: In function 'bno055_ser_probe': ../drivers/iio/imu/bno055/bno055_ser_core.c:532:18: error: implicit declaration of function 'devm_regmap_init'; did you mean 'vmem_map_init'? [-Werror=implicit-function-declaration] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c:532:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c: At top level: ../drivers/iio/imu/bno055/bno055_ser_core.c:495:26: error: storage size of 'bno055_ser_regmap_bus' isn't known 495 | static struct regmap_bus bno055_ser_regmap_bus = { Fixes: 2eef5a9cc643 ("iio: imu: add BNO055 serdev driver") Signed-off-by: Randy Dunlap Cc: Andrea Merello Cc: Jonathan Cameron Cc: Lars-Peter Clausen Cc: linux-iio@vger.kernel.org Cc: Link: https://lore.kernel.org/r/20240110185611.19723-1-rdunlap@infradead.org Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/bno055/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfbe880..c7f5866a177d9 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. -- GitLab From 9f6087851ec6dce5b15f694aeaf3e8ec8243224e Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Apr 2023 10:50:32 +0200 Subject: [PATCH 0107/1418] media: rc: bpf attach/detach requires write permission commit 6a9d552483d50953320b9d3b57abdee8d436f23f upstream. Note that bpf attach/detach also requires CAP_NET_ADMIN. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/bpf-lirc.c | 6 +++--- drivers/media/rc/lirc_dev.c | 5 ++++- drivers/media/rc/rc-core-priv.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e810..52d82cbe7685f 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 184e0b35744f3..adb8c794a2d7b 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7fc..7df949fc65e2b 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} -- GitLab From 7505a0ce08584b12d5c7ca4559725424972d803c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 5 Feb 2024 14:19:16 +0300 Subject: [PATCH 0108/1418] ksmbd: free aux buffer if ksmbd_iov_pin_rsp_read fails commit 108a020c64434fed4b69762879d78cd24088b4c7 upstream. ksmbd_iov_pin_rsp_read() doesn't free the provided aux buffer if it fails. Seems to be the caller's responsibility to clear the buffer in error case. Found by Linux Verification Center (linuxtesting.org). Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 4cfa45c2727ea..66d25d0e34d8b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6171,8 +6171,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, @@ -6382,8 +6384,10 @@ int smb2_read(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } ksmbd_fd_put(work, fp); return 0; -- GitLab From 01e9f82058e208747323513eef3df738051eff0e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:40:32 +0800 Subject: [PATCH 0109/1418] xfrm: Remove inner/outer modes from output path commit f4796398f21b9844017a2dac883b1dd6ad6edd60 upstream. The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Just like the input-side, removing this from the output code makes it possible to use transport-mode SAs underneath an inter-family tunnel mode SA. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Cc: Sri Sakthi Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_output.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9a5e79a38c679..07a7ee43b8ae2 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -414,7 +414,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; skb->protocol = htons(ETH_P_IP); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm4_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -437,7 +437,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) skb->ignore_df = 1; skb->protocol = htons(ETH_P_IPV6); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm6_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -453,22 +453,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) { - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_prepare_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_prepare_output(x, skb); break; case XFRM_MODE_TRANSPORT: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_output(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_ro_output(x, skb); WARN_ON_ONCE(1); break; @@ -866,21 +866,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode; - - if (x->sel.family == AF_UNSPEC) - inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb_dst(skb)->ops->family)); - else - inner_mode = &x->inner_mode; - - if (inner_mode == NULL) - return -EAFNOSUPPORT; - - switch (inner_mode->family) { - case AF_INET: + switch (skb->protocol) { + case htons(ETH_P_IP): return xfrm4_extract_output(x, skb); - case AF_INET6: + case htons(ETH_P_IPV6): return xfrm6_extract_output(x, skb); } -- GitLab From ba5f95788345b2740de6db12225354781bf8407e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:26:05 +0800 Subject: [PATCH 0110/1418] xfrm: Remove inner/outer modes from input path commit 5f24f41e8ea62a6a9095f9bbafb8b3aebe265c68 upstream. The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Removing them from the input path actually allows certain valid combinations that are currently disallowed. In particular, when a transport mode SA sits beneath a tunnel mode SA that changes address families, at present the transport mode SA cannot have AF_UNSPEC as its selector because it will be erroneously be treated as inter-family itself even though it simply sits beneath one. This is a serious problem because you can't set the selector to non-AF_UNSPEC either as that will cause the selector match to fail as we always match selectors to the inner-most traffic. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Cc: Sri Sakthi Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 66 +++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac1a645afa8df..deda4955c0466 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -232,9 +232,6 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -270,8 +267,6 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -332,22 +327,26 @@ out: */ static int xfrm_inner_mode_encap_remove(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: return xfrm4_remove_beet_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_beet_encap(x, skb); + } break; case XFRM_MODE_TUNNEL: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: return xfrm4_remove_tunnel_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_tunnel_encap(x, skb); break; + } } WARN_ON_ONCE(1); @@ -356,9 +355,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode = &x->inner_mode; - - switch (x->outer_mode.family) { + switch (x->props.family) { case AF_INET: xfrm4_extract_header(skb); break; @@ -370,17 +367,12 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) - return -EAFNOSUPPORT; - } - - switch (inner_mode->family) { - case AF_INET: + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: skb->protocol = htons(ETH_P_IP); break; - case AF_INET6: + case IPPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; default: @@ -388,7 +380,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) break; } - return xfrm_inner_mode_encap_remove(x, inner_mode, skb); + return xfrm_inner_mode_encap_remove(x, skb); } /* Remove encapsulation header. @@ -434,17 +426,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } static int xfrm_inner_mode_input(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: return xfrm_prepare_input(x, skb); case XFRM_MODE_TRANSPORT: - if (inner_mode->family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_input(x, skb); - if (inner_mode->family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_input(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: @@ -462,7 +453,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); - const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; @@ -492,7 +482,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode.family; + family = x->props.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -676,17 +666,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = &x->inner_mode; - - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - } - - if (xfrm_inner_mode_input(x, inner_mode, skb)) { + if (xfrm_inner_mode_input(x, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -701,7 +681,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode.family; + family = x->props.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -732,7 +712,7 @@ resume: err = -EAFNOSUPPORT; rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + afinfo = xfrm_state_afinfo_get_rcu(x->props.family); if (likely(afinfo)) err = afinfo->transport_finish(skb, xfrm_gro || async); rcu_read_unlock(); -- GitLab From fc811d88fb4e4871f6814e3cb3e95858357280f5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Feb 2024 09:23:40 -0800 Subject: [PATCH 0111/1418] drm/msm: Wire up tlb ops commit 8c7bfd8262319fd3f127a5380f593ea76f1b88a2 upstream. The brute force iommu_flush_iotlb_all() was good enough for unmap, but in some cases a map operation could require removing a table pte entry to replace with a block entry. This also requires tlb invalidation. Missing this was resulting an obscure iova fault on what should be a valid buffer address. Thanks to Robin Murphy for helping me understand the cause of the fault. Cc: Robin Murphy Cc: stable@vger.kernel.org Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/578117/ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_iommu.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d12ba47b37c4f..0de3612135e96 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -194,11 +196,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -206,7 +230,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -282,6 +306,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; -- GitLab From 8c22b23a2778c9ab47f030b5eacf8d8c156115a7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 21 Aug 2023 16:02:01 -0400 Subject: [PATCH 0112/1418] drm/prime: Support page array >= 4GB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b671cd3d456315f63171a670769356a196cf7fd0 upstream. Without unsigned long typecast, the size is passed in as zero if page array size >= 4GB, nr_pages >= 0x100000, then sg list converted will have the first and the last chunk lost. Signed-off-by: Philip Yang Acked-by: Felix Kuehling Reviewed-by: Christian König CC: stable@vger.kernel.org Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230821200201.24685-1-Philip.Yang@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_prime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index eb09e86044c6d..68a6d4b0ead75 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -828,7 +828,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); -- GitLab From e70123fdbe82e7e4478d1c22eb5e3a025b921917 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 5 Feb 2024 14:54:05 -0700 Subject: [PATCH 0113/1418] drm/amd/display: Increase frame-larger-than for all display_mode_vba files commit e63e35f0164c43fbc1adb481d6604f253b9f9667 upstream. After a recent change in LLVM, allmodconfig (which has CONFIG_KCSAN=y and CONFIG_WERROR=y enabled) has a few new instances of -Wframe-larger-than for the mode support and system configuration functions: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20v2.c:3393:6: error: stack frame size (2144) exceeds limit (2048) in 'dml20v2_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3393 | void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:3520:6: error: stack frame size (2192) exceeds limit (2048) in 'dml21_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3520 | void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.c:3286:6: error: stack frame size (2128) exceeds limit (2048) in 'dml20_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3286 | void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. Without the sanitizers enabled, there are no warnings. This was the catalyst for commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") and that same change was made to dml in commit 5b750b22530f ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml") but the frame_warn_flag variable was not applied to all files. Do so now to clear up the warnings and make all these files consistent. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issue/1990 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ca7d240006213..6fdf87a6e240f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -60,11 +60,11 @@ ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) -- GitLab From 3ca5a3cdc0c105d976adbb8dc8469026bd07b097 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Tue, 30 Jan 2024 15:34:08 +0800 Subject: [PATCH 0114/1418] drm/amd/display: Preserve original aspect ratio in create stream commit deb110292180cd501f6fde2a0178d65fcbcabb0c upstream. [Why] The original picture aspect ratio in mode struct may have chance be overwritten with wrong aspect ratio data in create_stream_for_sink(). It will create a different VIC output and cause HDMI compliance test failed. [How] Preserve the original picture aspect ratio data during create the stream. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f02e509d5facb..a826c92933199 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6001,7 +6001,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); -- GitLab From 48a8ccccffbae10c91d31fc872db5c31aba07518 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Tue, 30 Jan 2024 23:35:51 -0800 Subject: [PATCH 0115/1418] hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove commit e0526ec5360a48ad3ab2e26e802b0532302a7e11 upstream. In commit ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel"), napi_disable was getting called for all channels, including all subchannels without confirming if they are enabled or not. This caused hv_netvsc getting hung at napi_disable, when netvsc_probe() has finished running but nvdev->subchan_work has not started yet. netvsc_subchan_work() -> rndis_set_subchannel() has not created the sub-channels and because of that netvsc_sc_open() is not running. netvsc_remove() calls cancel_work_sync(&nvdev->subchan_work), for which netvsc_subchan_work did not run. netif_napi_add() sets the bit NAPI_STATE_SCHED because it ensures NAPI cannot be scheduled. Then netvsc_sc_open() -> napi_enable will clear the NAPIF_STATE_SCHED bit, so it can be scheduled. napi_disable() does the opposite. Now during netvsc_device_remove(), when napi_disable is called for those subchannels, napi_disable gets stuck on infinite msleep. This fix addresses this problem by ensuring that napi_disable() is not getting called for non-enabled NAPI struct. But netif_napi_del() is still necessary for these non-enabled NAPI struct for cleanup purpose. Call trace: [ 654.559417] task:modprobe state:D stack: 0 pid: 2321 ppid: 1091 flags:0x00004002 [ 654.568030] Call Trace: [ 654.571221] [ 654.573790] __schedule+0x2d6/0x960 [ 654.577733] schedule+0x69/0xf0 [ 654.581214] schedule_timeout+0x87/0x140 [ 654.585463] ? __bpf_trace_tick_stop+0x20/0x20 [ 654.590291] msleep+0x2d/0x40 [ 654.593625] napi_disable+0x2b/0x80 [ 654.597437] netvsc_device_remove+0x8a/0x1f0 [hv_netvsc] [ 654.603935] rndis_filter_device_remove+0x194/0x1c0 [hv_netvsc] [ 654.611101] ? do_wait_intr+0xb0/0xb0 [ 654.615753] netvsc_remove+0x7c/0x120 [hv_netvsc] [ 654.621675] vmbus_remove+0x27/0x40 [hv_vmbus] Cc: stable@vger.kernel.org Fixes: ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1706686551-28510-1-git-send-email-schakrabarti@linux.microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index da737d959e81c..3a834d4e1c842 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -740,7 +740,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } -- GitLab From ef3d50e884d4a765bbb8c73c29b21447196f1c7a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 31 Jan 2024 14:09:55 +0000 Subject: [PATCH 0116/1418] ring-buffer: Clean ring_buffer_poll_wait() error return commit 66bbea9ed6446b8471d365a22734dc00556c4785 upstream. The return type for ring_buffer_poll_wait() is __poll_t. This is behind the scenes an unsigned where we can set event bits. In case of a non-allocated CPU, we do return instead -EINVAL (0xffffffea). Lucky us, this ends up setting few error bits (EPOLLERR | EPOLLHUP | EPOLLNVAL), so user-space at least is aware something went wrong. Nonetheless, this is an incorrect code. Replace that -EINVAL with a proper EPOLLERR to clean that output. As this doesn't change the behaviour, there's no need to treat this change as a bug fix. Link: https://lore.kernel.org/linux-trace-kernel/20240131140955.3322792-1-vdonnefort@google.com Cc: stable@vger.kernel.org Fixes: 6721cb6002262 ("ring-buffer: Do not poll non allocated cpu buffers") Signed-off-by: Vincent Donnefort Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1285e7fb597ee..e019a9278794f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1095,7 +1095,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; -- GitLab From c96ce4903b624528f6a21fa0b5ebcbc4db7e7849 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:09 +0200 Subject: [PATCH 0117/1418] nfp: flower: fix hardware offload for the transfer layer port commit 3a007b8009b5f8af021021b7a590a6da0dc4c6e0 upstream. The nfp driver will merge the tp source port and tp destination port into one dword which the offset must be zero to do hardware offload. However, the mangle action for the tp source port and tp destination port is separated for tc ct action. Modify the mangle action for the FLOW_ACT_MANGLE_HDR_TYPE_TCP and FLOW_ACT_MANGLE_HDR_TYPE_UDP to satisfy the nfp driver offload check for the tp port. The mangle action provides a 4B value for source, and a 4B value for the destination, but only 2B of each contains the useful information. For offload the 2B of each is combined into a single 4B word. Since the incoming mask for the source is '0xFFFF' the shift-left will throw away the 0xFFFF part. When this gets combined together in the offload it will clear the destination field. Fix this by setting the lower bits back to 0xFFFF, effectively doing a rotate-left operation on the mask. Fixes: 5cee92c6f57a ("nfp: flower: support hw offload for ct nat action") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-3-louis.peens@corigine.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../ethernet/netronome/nfp/flower/conntrack.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index f7492be452aed..7af03b45555dd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1379,10 +1379,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: -- GitLab From 5d89c48337c78a4cbe15ae0d6aa7ca043dae884e Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:58 -0500 Subject: [PATCH 0118/1418] serial: max310x: set default value when reading clock ready bit commit 0419373333c2f2024966d36261fd82a453281e80 upstream. If regmap_read() returns a non-zero value, the 'val' variable can be left uninitialized. Clear it before calling regmap_read() to make sure we properly detect the clock ready bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 338cb19dec23c..abae6e245eb29 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -641,7 +641,7 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; + unsigned int val = 0; msleep(10); regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); if (!(val & MAX310X_STS_CLKREADY_BIT)) { -- GitLab From 7971a029eb23976016d7a9cd94f54e0c35aa26fb Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:59 -0500 Subject: [PATCH 0119/1418] serial: max310x: improve crystal stable clock detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 93cd256ab224c2519e7c4e5f58bb4f1ac2bf0965 upstream. Some people are seeing a warning similar to this when using a crystal: max310x 11-006c: clock is not stable yet The datasheet doesn't mention the maximum time to wait for the clock to be stable when using a crystal, and it seems that the 10ms delay in the driver is not always sufficient. Jan Kundrát reported that it took three tries (each separated by 10ms) to get a stable clock. Modify behavior to check stable clock ready bit multiple times (20), and waiting 10ms between each try. Note: the first draft of the driver originally used a 50ms delay, without checking the clock stable bit. Then a loop with 1000 retries was implemented, each time reading the clock stable bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Link: https://lore.kernel.org/all/20240110174015.6f20195fde08e5c9e64e5675@hugovil.com/raw Link: https://github.com/boundarydevices/linux/commit/e5dfe3e4a751392515d78051973190301a37ca9a Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index abae6e245eb29..2ce4cf27580e9 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -641,12 +645,19 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val = 0; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) dev_warn(dev, "clock is not stable yet\n"); - } } return bestfreq; -- GitLab From 0046dd2e9ff6d5f12a273046880cad39313bfcf4 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:00 -0500 Subject: [PATCH 0120/1418] serial: max310x: fail probe if clock crystal is unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 upstream. A stable clock is really required in order to use this UART, so log an error message and bail out if the chip reports that the clock is not stable. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 2ce4cf27580e9..e91a2a59d9f00 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -587,7 +587,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -657,7 +657,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); if (!stable) - dev_warn(dev, "clock is not stable yet\n"); + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1285,7 +1286,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1363,6 +1364,11 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { -- GitLab From 5e2f407646faa2aa45c853d66a07cf4c8afdb59c Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:01 -0500 Subject: [PATCH 0121/1418] serial: max310x: prevent infinite while() loop in port startup commit b35f8dbbce818b02c730dc85133dc7754266e084 upstream. If there is a problem after resetting a port, the do/while() loop that checks the default value of DIVLSB register may run forever and spam the I2C bus. Add a delay before each read of DIVLSB, and a maximum number of tries to prevent that situation from happening. Also fail probe if port reset is unsuccessful. Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index e91a2a59d9f00..163a89f84c9c2 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + /* Crystal-related definitions */ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ @@ -1349,6 +1353,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1357,8 +1364,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } -- GitLab From 76d3ad7d0264d8ffe500ca9fa7f8c4fec8f1f9bd Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Fri, 2 Feb 2024 21:13:16 +0530 Subject: [PATCH 0122/1418] powerpc/64: Set task pt_regs->link to the LR value on scv entry commit aad98efd0b121f63a2e1c221dcb4d4850128c697 upstream. Nysal reported that userspace backtraces are missing in offcputime bcc tool. As an example: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write - python (9107) 8 write - sudo (9105) 9 mmap - python (9107) 16 clock_nanosleep - multipathd (697) 3001604 The offcputime bcc tool attaches a bpf program to a kprobe on finish_task_switch(), which is usually hit on a syscall from userspace. With the switch to system call vectored, we started setting pt_regs->link to zero. This is because system call vectored behaves like a function call with LR pointing to the system call return address, and with no modification to SRR0/SRR1. The LR value does indicate our next instruction, so it is being saved as pt_regs->nip, and pt_regs->link is being set to zero. This is not a problem by itself, but BPF uses perf callchain infrastructure for capturing stack traces, and that stores LR as the second entry in the stack trace. perf has code to cope with the second entry being zero, and skips over it. However, generic userspace unwinders assume that a zero entry indicates end of the stack trace, resulting in a truncated userspace stack trace. Rather than fixing all userspace unwinders to ignore/skip past the second entry, store the real LR value in pt_regs->link so that there continues to be a valid, though duplicate entry in the stack trace. With this change: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write write [unknown] [unknown] [unknown] [unknown] [unknown] PyObject_VectorcallMethod [unknown] [unknown] PyObject_CallOneArg PyFile_WriteObject PyFile_WriteString [unknown] [unknown] PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 7 write write [unknown] sudo_ev_loop_v1 sudo_ev_dispatch_v1 [unknown] [unknown] [unknown] [unknown] __libc_start_main - sudo (1291) 7 syscall syscall bpf_open_perf_buffer_opts [unknown] [unknown] [unknown] [unknown] _PyObject_MakeTpCall PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 11 clock_nanosleep clock_nanosleep nanosleep sleep [unknown] [unknown] __clone - multipathd (698) 3001661 Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org Reported-by: "Nysal Jan K.A" Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240202154316.395276-1-naveen@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/interrupt_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc8393..26c151e2a7942 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr -- GitLab From 42422f8f8c52af49a5caec4919584e2e8f87997b Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 7 Feb 2024 10:27:58 +0100 Subject: [PATCH 0123/1418] powerpc/cputable: Add missing PPC_FEATURE_BOOKE on PPC64 Book-E commit eb6d871f4ba49ac8d0537e051fe983a3a4027f61 upstream. Commit e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") moved the cpu_specs to separate header files. Previously PPC_FEATURE_BOOKE was enabled by CONFIG_PPC_BOOK3E_64. The definition in cpu_specs_e500mc.h for PPC64 no longer enables PPC_FEATURE_BOOKE. This breaks user space reading the ELF hwcaps and expect PPC_FEATURE_BOOKE. Debugging an application with gdb is no longer working on e5500/e6500 because the 64-bit detection relies on PPC_FEATURE_BOOKE for Book-E. Fixes: e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: David Engraf Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240207092758.1058893-1-david.engraf@sysgo.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cpu_specs_e500mc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f831..2ae8e9a7b461c 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) -- GitLab From c194adaa8a4c404ba5676ac90ff607b3d7b3120d Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Tue, 13 Feb 2024 10:56:35 +0530 Subject: [PATCH 0124/1418] powerpc/pseries: fix accuracy of stolen time commit cbecc9fcbbec60136b0180ba0609c829afed5c81 upstream. powerVM hypervisor updates the VPA fields with stolen time data. It currently reports enqueue_dispatch_tb and ready_enqueue_tb for this purpose. In linux these two fields are used to report the stolen time. The VPA fields are updated at the TB frequency. On powerPC its mostly set at 512Mhz. Hence this needs a conversion to ns when reporting it back as rest of the kernel timings are in ns. This conversion is already handled in tb_to_ns function. So use that function to report accurate stolen time. Observed this issue and used an Capped Shared Processor LPAR(SPLPAR) to simplify the experiments. In all these cases, 100% VP Load is run using stress-ng workload. Values of stolen time is in percentages as reported by mpstat. With the patch values are close to expected. 6.8.rc1 +Patch 12EC/12VP 0.0 0.0 12EC/24VP 25.7 50.2 12EC/36VP 37.3 69.2 12EC/48VP 38.5 78.3 Fixes: 0e8a63132800 ("powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Shrikanth Hegde Reviewed-by: Nicholas Piggin Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20240213052635.231597-1-sshegde@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 541199c6a587d..5186d65d772e2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -660,8 +660,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif -- GitLab From 8b4025679e563b407d322fcd13eb287179e1fa32 Mon Sep 17 00:00:00 2001 From: Aleksander Mazur Date: Tue, 23 Jan 2024 14:43:00 +0100 Subject: [PATCH 0125/1418] x86/Kconfig: Transmeta Crusoe is CPU family 5, not 6 commit f6a1892585cd19e63c4ef2334e26cd536d5b678d upstream. The kernel built with MCRUSOE is unbootable on Transmeta Crusoe. It shows the following error message: This kernel requires an i686 CPU, but only detected an i586 CPU. Unable to boot - please use a kernel appropriate for your CPU. Remove MCRUSOE from the condition introduced in commit in Fixes, effectively changing X86_MINIMUM_CPU_FAMILY back to 5 on that machine, which matches the CPU family given by CPUID. [ bp: Massage commit message. ] Fixes: 25d76ac88821 ("x86/Kconfig: Explicitly enumerate i686-class CPUs in Kconfig") Signed-off-by: Aleksander Mazur Signed-off-by: Borislav Petkov (AMD) Acked-by: H. Peter Anvin Cc: Link: https://lore.kernel.org/r/20240123134309.1117782-1-deweloper@wp.pl Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d7..ce5ed2c2db0c9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -375,7 +375,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" -- GitLab From 627339cccdc9166792ecf96bc3c9f711a60ce996 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 29 Jan 2024 22:36:03 -0800 Subject: [PATCH 0126/1418] x86/fpu: Stop relying on userspace for info to fault in xsave buffer commit d877550eaf2dc9090d782864c96939397a3c6835 upstream. Before this change, the expected size of the user space buffer was taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed from user-space, so it is possible construct a sigreturn frame where: * fx_sw->xstate_size is smaller than the size required by valid bits in fx_sw->xfeatures. * user-space unmaps parts of the sigrame fpu buffer so that not all of the buffer required by xrstor is accessible. In this case, xrstor tries to restore and accesses the unmapped area which results in a fault. But fault_in_readable succeeds because buf + fx_sw->xstate_size is within the still mapped area, so it goes back and tries xrstor again. It will spin in this loop forever. Instead, fault in the maximum size which can be touched by XRSTOR (taken from fpstate->user_size). [ dhansen: tweak subject / changelog ] Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path") Reported-by: Konstantin Bogomolov Suggested-by: Thomas Gleixner Signed-off-by: Andrei Vagin Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbde5bf..247f2225aa9f3 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ retry: if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* -- GitLab From 3863ca052216d893d21ac42b75c2db8a0bd49689 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Tue, 23 Jan 2024 22:12:20 +0000 Subject: [PATCH 0127/1418] KVM: x86/pmu: Fix type length error when reading pmu->fixed_ctr_ctrl commit 05519c86d6997cfb9bb6c82ce1595d1015b718dc upstream. Use a u64 instead of a u8 when taking a snapshot of pmu->fixed_ctr_ctrl when reprogramming fixed counters, as truncating the value results in KVM thinking fixed counter 2 is already disabled (the bug also affects fixed counters 3+, but KVM doesn't yet support those). As a result, if the guest disables fixed counter 2, KVM will get a false negative and fail to reprogram/disable emulation of the counter, which can leads to incorrect counts and spurious PMIs in the guest. Fixes: 76d287b2342e ("KVM: x86/pmu: Drop "u8 ctrl, int idx" for reprogram_fixed_counter()") Cc: stable@vger.kernel.org Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20240123221220.3911317-1-mizhang@google.com [sean: rewrite changelog to call out the effects of the bug] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9a75a0d5deae1..220cdbe1e286e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -38,7 +38,7 @@ static int fixed_pmc_events[] = {1, 0, 7}; static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; -- GitLab From aedcefae6c18f6d7b2c75c1a40d5731004929621 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 26 Jan 2024 10:48:41 -0600 Subject: [PATCH 0128/1418] x86/mm/ident_map: Use gbpages only where full GB page should be mapped. commit d794734c9bbfe22f86686dc2909c25f5ffe1a572 upstream. When ident_pud_init() uses only gbpages to create identity maps, large ranges of addresses not actually requested can be included in the resulting table; a 4K request will map a full GB. On UV systems, this ends up including regions that will cause hardware to halt the system if accessed (these are marked "reserved" by BIOS). Even processor speculation into these regions is enough to trigger the system halt. Only use gbpages when map creation requests include the full GB page of space. Fall back to using smaller 2M pages when only portions of a GB page are included in the request. No attempt is made to coalesce mapping requests. If a request requires a map entry at the 2M (pmd) level, subsequent mapping requests within the same 1G region will also be at the pmd level, even if adjacent or overlapping such requests could have been combined to map a full gbpage. Existing usage starts with larger regions and then adds smaller regions, so this should not have any great consequence. [ dhansen: fix up comment formatting, simplifty changelog ] Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240126164841.170866-1-steve.wahl%40hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/ident_map.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a72..f50cc210a9818 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { - pud_t pudval; + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; - if (pud_present(*pud)) - continue; + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { + pud_t pudval; - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; -- GitLab From eae748df18ed25fc155964a51a1533eea29baae7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Feb 2024 08:23:05 -0700 Subject: [PATCH 0129/1418] io_uring/net: fix multishot accept overflow handling commit a37ee9e117ef73bbc2f5c0b31911afd52d229861 upstream. If we hit CQ ring overflow when attempting to post a multishot accept completion, we don't properly save the result or return code. This results in losing the accepted fd value. Instead, we return the result from the poll operation that triggered the accept retry. This is generally POLLIN|POLLPRI|POLLRDNORM|POLLRDBAND which is 0xc3, or 195, which looks like a valid file descriptor, but it really has no connection to that. Handle this like we do for other multishot completions - assign the result, and return IOU_STOP_MULTISHOT to cancel any further completions from this request when overflow is hit. This preserves the result, as we should, and tells the application that the request needs to be re-armed. Cc: stable@vger.kernel.org Fixes: 515e26961295 ("io_uring: revert "io_uring fix multishot accept ordering"") Link: https://github.com/axboe/liburing/issues/1062 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 618ab186fe036..c062ce66af12c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1326,7 +1326,7 @@ retry: * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1350,7 +1350,8 @@ retry: if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- GitLab From a943c7fbdfebb35a0f58fff94c018ee0e7b3482c Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 6 Feb 2024 09:39:12 +0100 Subject: [PATCH 0130/1418] mmc: slot-gpio: Allow non-sleeping GPIO ro commit cc9432c4fb159a3913e0ce3173b8218cd5bad2e0 upstream. This change uses the appropriate _cansleep or non-sleeping API for reading GPIO read-only state. This allows users with GPIOs that never sleepbeing called in atomic context. Implement the same mechanism as in commit 52af318c93e97 ("mmc: Allow non-sleeping GPIO cd"). Signed-off-by: Alexander Stein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240206083912.2543142-1-alexander.stein@ew.tq-group.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/slot-gpio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index dd2a4b6ab6adb..e3c69c6b85a6c 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -62,11 +62,15 @@ int mmc_gpio_alloc(struct mmc_host *host) int mmc_gpio_get_ro(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; + int cansleep; if (!ctx || !ctx->ro_gpio) return -ENOSYS; - return gpiod_get_value_cansleep(ctx->ro_gpio); + cansleep = gpiod_cansleep(ctx->ro_gpio); + return cansleep ? + gpiod_get_value_cansleep(ctx->ro_gpio) : + gpiod_get_value(ctx->ro_gpio); } EXPORT_SYMBOL(mmc_gpio_get_ro); -- GitLab From 53e8abc14e579c87c91d10db88d9c34b0d295361 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Thu, 15 Feb 2024 15:49:22 +0000 Subject: [PATCH 0131/1418] ALSA: hda/realtek: fix mute/micmute LED For HP mt645 commit 32f03f4002c5df837fb920eb23fcd2f4af9b0b23 upstream. The HP mt645 G7 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240215154922.778394-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 153a9605d3a97..92a656fb53212 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9690,6 +9690,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9697,6 +9698,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 35076e3fb4b8cbe9f24e1b33c4985cbd5ac59c16 Mon Sep 17 00:00:00 2001 From: bo liu Date: Mon, 5 Feb 2024 09:38:02 +0800 Subject: [PATCH 0132/1418] ALSA: hda/conexant: Add quirk for SWS JS201D commit 4639c5021029d49fd2f97fa8d74731f167f98919 upstream. The SWS JS201D need a different pinconfig from windows driver. Add a quirk to use a specific pinconfig to SWS JS201D. Signed-off-by: bo liu Cc: Link: https://lore.kernel.org/r/20240205013802.51907-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8819e8a98763..e8209178d87bb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -344,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; -- GitLab From 9c9c68d64fd3284f7097ed6ae057c8441f39fcd3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 24 Jan 2024 21:19:36 +0900 Subject: [PATCH 0133/1418] nilfs2: fix data corruption in dsync block recovery for small block sizes commit 67b8bcbaed4777871bb0dcc888fb02a614a98ab1 upstream. The helper function nilfs_recovery_copy_block() of nilfs_recovery_dsync_blocks(), which recovers data from logs created by data sync writes during a mount after an unclean shutdown, incorrectly calculates the on-page offset when copying repair data to the file's page cache. In environments where the block size is smaller than the page size, this flaw can cause data corruption and leak uninitialized memory bytes during the recovery process. Fix these issues by correcting this byte offset calculation on the page. Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/recovery.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b657938ff..a9b8d77c8c1d5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct page *page) { struct buffer_head *bh_org; + size_t from = pos & ~PAGE_MASK; void *kaddr; bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); @@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, return -EIO; kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); kunmap_atomic(kaddr); brelse(bh_org); return 0; @@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, page); if (unlikely(err)) goto failed_page; -- GitLab From 8494ba2c9ea00a54d5b50e69b22c55a8958bce32 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 31 Jan 2024 23:56:57 +0900 Subject: [PATCH 0134/1418] nilfs2: fix hang in nilfs_lookup_dirty_data_buffers() commit 38296afe3c6ee07319e01bb249aa4bb47c07b534 upstream. Syzbot reported a hang issue in migrate_pages_batch() called by mbind() and nilfs_lookup_dirty_data_buffers() called in the log writer of nilfs2. While migrate_pages_batch() locks a folio and waits for the writeback to complete, the log writer thread that should bring the writeback to completion picks up the folio being written back in nilfs_lookup_dirty_data_buffers() that it calls for subsequent log creation and was trying to lock the folio. Thus causing a deadlock. In the first place, it is unexpected that folios/pages in the middle of writeback will be updated and become dirty. Nilfs2 adds a checksum to verify the validity of the log being written and uses it for recovery at mount, so data changes during writeback are suppressed. Since this is broken, an unclean shutdown could potentially cause recovery to fail. Investigation revealed that the root cause is that the wait for writeback completion in nilfs_page_mkwrite() is conditional, and if the backing device does not require stable writes, data may be modified without waiting. Fix these issues by making nilfs_page_mkwrite() wait for writeback to finish regardless of the stable write requirement of the backing device. Link: https://lkml.kernel.org/r/20240131145657.4209-1-konishi.ryusuke@gmail.com Fixes: 1d1d1a767206 ("mm: only enforce stable page writes if the backing device requires it") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ee2ae68da3b22d04cd8d@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000047d819061004ad6c@google.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a265d391ffe92..822e8d95d31ef 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); -- GitLab From 8731fe001a60581794ed9cf65da8cd304846a6fb Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 25 Jan 2024 17:12:53 -0600 Subject: [PATCH 0135/1418] crypto: ccp - Fix null pointer dereference in __sev_platform_shutdown_locked commit ccb88e9549e7cfd8bcd511c538f437e20026e983 upstream. The SEV platform device can be shutdown with a null psp_master, e.g., using DEBUG_TEST_DRIVER_REMOVE. Found using KASAN: [ 137.148210] ccp 0000:23:00.1: enabling device (0000 -> 0002) [ 137.162647] ccp 0000:23:00.1: no command queues available [ 137.170598] ccp 0000:23:00.1: sev enabled [ 137.174645] ccp 0000:23:00.1: psp enabled [ 137.178890] general protection fault, probably for non-canonical address 0xdffffc000000001e: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI [ 137.182693] KASAN: null-ptr-deref in range [0x00000000000000f0-0x00000000000000f7] [ 137.182693] CPU: 93 PID: 1 Comm: swapper/0 Not tainted 6.8.0-rc1+ #311 [ 137.182693] RIP: 0010:__sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] Code: 08 80 3c 08 00 0f 85 0e 01 00 00 48 8b 1d 67 b6 01 08 48 b8 00 00 00 00 00 fc ff df 48 8d bb f0 00 00 00 48 89 f9 48 c1 e9 03 <80> 3c 01 00 0f 85 fe 00 00 00 48 8b 9b f0 00 00 00 48 85 db 74 2c [ 137.182693] RSP: 0018:ffffc900000cf9b0 EFLAGS: 00010216 [ 137.182693] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 000000000000001e [ 137.182693] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000000000f0 [ 137.182693] RBP: ffffc900000cf9c8 R08: 0000000000000000 R09: fffffbfff58f5a66 [ 137.182693] R10: ffffc900000cf9c8 R11: ffffffffac7ad32f R12: ffff8881e5052c28 [ 137.182693] R13: ffff8881e5052c28 R14: ffff8881758e43e8 R15: ffffffffac64abf8 [ 137.182693] FS: 0000000000000000(0000) GS:ffff889de7000000(0000) knlGS:0000000000000000 [ 137.182693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 137.182693] CR2: 0000000000000000 CR3: 0000001cf7c7e000 CR4: 0000000000350ef0 [ 137.182693] Call Trace: [ 137.182693] [ 137.182693] ? show_regs+0x6c/0x80 [ 137.182693] ? __die_body+0x24/0x70 [ 137.182693] ? die_addr+0x4b/0x80 [ 137.182693] ? exc_general_protection+0x126/0x230 [ 137.182693] ? asm_exc_general_protection+0x2b/0x30 [ 137.182693] ? __sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] sev_firmware_shutdown.isra.0+0x1e/0x80 [ 137.182693] sev_dev_destroy+0x49/0x100 [ 137.182693] psp_dev_destroy+0x47/0xb0 [ 137.182693] sp_destroy+0xbb/0x240 [ 137.182693] sp_pci_remove+0x45/0x60 [ 137.182693] pci_device_remove+0xaa/0x1d0 [ 137.182693] device_remove+0xc7/0x170 [ 137.182693] really_probe+0x374/0xbe0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] __driver_probe_device+0x199/0x460 [ 137.182693] driver_probe_device+0x4e/0xd0 [ 137.182693] __driver_attach+0x191/0x3d0 [ 137.182693] ? __pfx___driver_attach+0x10/0x10 [ 137.182693] bus_for_each_dev+0x100/0x190 [ 137.182693] ? __pfx_bus_for_each_dev+0x10/0x10 [ 137.182693] ? __kasan_check_read+0x15/0x20 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? _raw_spin_unlock+0x27/0x50 [ 137.182693] driver_attach+0x41/0x60 [ 137.182693] bus_add_driver+0x2a8/0x580 [ 137.182693] driver_register+0x141/0x480 [ 137.182693] __pci_register_driver+0x1d6/0x2a0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? esrt_sysfs_init+0x1cd/0x5d0 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] sp_pci_init+0x22/0x30 [ 137.182693] sp_mod_init+0x14/0x30 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] do_one_initcall+0xd1/0x470 [ 137.182693] ? __pfx_do_one_initcall+0x10/0x10 [ 137.182693] ? parameq+0x80/0xf0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? __kmalloc+0x3b0/0x4e0 [ 137.182693] ? kernel_init_freeable+0x92d/0x1050 [ 137.182693] ? kasan_populate_vmalloc_pte+0x171/0x190 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] kernel_init_freeable+0xa64/0x1050 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] kernel_init+0x24/0x160 [ 137.182693] ? __switch_to_asm+0x3e/0x70 [ 137.182693] ret_from_fork+0x40/0x80 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] ret_from_fork_asm+0x1b/0x30 [ 137.182693] [ 137.182693] Modules linked in: [ 137.538483] ---[ end trace 0000000000000000 ]--- Fixes: 1b05ece0c931 ("crypto: ccp - During shutdown, check SEV data pointer before using") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Signed-off-by: Kim Phillips Reviewed-by: Liam Merwick Acked-by: John Allen Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/sev-dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index b8e02c3a19610..bbfb0f288dc35 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -515,10 +515,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); static int __sev_platform_shutdown_locked(int *error) { - struct sev_device *sev = psp_master->sev_data; + struct psp_device *psp = psp_master; + struct sev_device *sev; int ret; - if (!sev || sev->state == SEV_STATE_UNINIT) + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); -- GitLab From 57b8478c103ab9992c2fbddc0c6db83f05587ab8 Mon Sep 17 00:00:00 2001 From: Daniel Basilio Date: Fri, 2 Feb 2024 13:37:17 +0200 Subject: [PATCH 0136/1418] nfp: use correct macro for LengthSelect in BAR config commit b3d4f7f2288901ed2392695919b3c0e24c1b4084 upstream. The 1st and 2nd expansion BAR configuration registers are configured, when the driver starts up, in variables 'barcfg_msix_general' and 'barcfg_msix_xpb', respectively. The 'LengthSelect' field is ORed in from bit 0, which is incorrect. The 'LengthSelect' field should start from bit 27. This has largely gone un-noticed because NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT happens to be 0. Fixes: 4cb584e0ee7d ("nfp: add CPP access core") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Daniel Basilio Signed-off-by: Louis Peens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 33b4c28563162..3f10c5365c80e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) const u32 barcfg_msix_general = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); const u32 barcfg_msix_xpb = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( NFP_CPP_TARGET_ISLAND_XPB); const u32 barcfg_explicit[4] = { -- GitLab From 685fc1711cdaf879c352a6a1d8787415c000b831 Mon Sep 17 00:00:00 2001 From: Daniel de Villiers Date: Fri, 2 Feb 2024 13:37:18 +0200 Subject: [PATCH 0137/1418] nfp: flower: prevent re-adding mac index for bonded port commit 1a1c13303ff6d64e6f718dc8aa614e580ca8d9b4 upstream. When physical ports are reset (either through link failure or manually toggled down and up again) that are slaved to a Linux bond with a tunnel endpoint IP address on the bond device, not all tunnel packets arriving on the bond port are decapped as expected. The bond dev assigns the same MAC address to itself and each of its slaves. When toggling a slave device, the same MAC address is therefore offloaded to the NFP multiple times with different indexes. The issue only occurs when re-adding the shared mac. The nfp_tunnel_add_shared_mac() function has a conditional check early on that checks if a mac entry already exists and if that mac entry is global: (entry && nfp_tunnel_is_mac_idx_global(entry->index)). In the case of a bonded device (For example br-ex), the mac index is obtained, and no new index is assigned. We therefore modify the conditional in nfp_tunnel_add_shared_mac() to check if the port belongs to the LAG along with the existing checks to prevent a new global mac index from being re-assigned to the slave port. Fixes: 20cce8865098 ("nfp: flower: enable MAC address sharing for offloadable devs") CC: stable@vger.kernel.org # 5.1+ Signed-off-by: Daniel de Villiers Signed-off-by: Louis Peens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 52f67157bd0f7..a3c52c91a575d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -980,7 +980,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); - if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { if (entry->bridge_count || !nfp_flower_is_supported_bridge(netdev)) { nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, -- GitLab From 6c84dbe8f8faf1c96a4409accfa8f0337ef2d5d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Jan 2024 09:51:09 +0100 Subject: [PATCH 0138/1418] wifi: cfg80211: fix wiphy delayed work queueing commit b743287d7a0007493f5cada34ed2085d475050b4 upstream. When a wiphy work is queued with timer, and then again without a delay, it's started immediately but *also* started again after the timer expires. This can lead, for example, to warnings in mac80211's offchannel code as reported by Jouni. Running the same work twice isn't expected, of course. Fix this by deleting the timer at this point, when queuing immediately due to delay=0. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Link: https://msgid.link/20240125095108.2feb0eaaa446.I4617f3210ed0e7f252290d5970dac6a876aa595b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 8809e668ed912..3fcddc8687ed4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1671,6 +1671,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } -- GitLab From 783912cbce88be94ac2f9d4c256bd61e708814ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:10 +0100 Subject: [PATCH 0139/1418] wifi: mac80211: reload info pointer in ieee80211_tx_dequeue() commit c98d8836b817d11fdff4ca7749cbbe04ff7f0c64 upstream. This pointer can change here since the SKB can change, so we actually later open-coded IEEE80211_SKB_CB() again. Reload the pointer where needed, so the monitor-mode case using it gets fixed, and then use info-> later as well. Cc: stable@vger.kernel.org Fixes: 531682159092 ("mac80211: fix VLAN handling with TXQs") Link: https://msgid.link/20240131164910.b54c28d583bc.I29450cec84ea6773cff5d9c16ff92b836c331471@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2db103a56a28f..322a035f75929 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -3838,6 +3838,7 @@ begin: goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3882,7 +3883,7 @@ begin: } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { -- GitLab From 659311f593188a3d6c6adcb7d9316993f9431a91 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Fri, 9 Feb 2024 17:24:49 -0800 Subject: [PATCH 0140/1418] irqchip/irq-brcmstb-l2: Add write memory barrier before exit commit b0344d6854d25a8b3b901c778b1728885dd99007 upstream. It was observed on Broadcom devices that use GIC v3 architecture L1 interrupt controllers as the parent of brcmstb-l2 interrupt controllers that the deactivation of the parent interrupt could happen before the brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the interrupt only to find that no L2 interrupt was pending. The result was a spurious interrupt invoking handle_bad_irq() with its associated messaging. While this did not create a functional problem it is a waste of cycles. The hazard exists because the memory mapped bus writes to the brcmstb-l2 registers are buffered and the GIC v3 architecture uses a very efficient system register write to deactivate the interrupt. Add a write memory barrier prior to invoking chained_irq_exit() to introduce a dsb(st) on those systems to ensure the system register write cannot be executed until the memory mapped writes are visible to the system. [ florian: Added Fixes tag ] Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Signed-off-by: Thomas Gleixner Acked-by: Florian Fainelli Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-brcmstb-l2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 091b0fe7e3242..5d4421f75b43a 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -113,6 +113,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } -- GitLab From ce2b826582f5e8f802ca6314b7a1ae5c4a27fe08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2024 10:12:06 +0000 Subject: [PATCH 0141/1418] irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update commit af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 upstream. When updating the affinity of a VPE, the VMOVP command is currently skipped if the two CPUs are part of the same VPE affinity. But this is wrong, as the doorbell corresponding to this VPE is still delivered on the 'old' CPU, which screws up the balancing. Furthermore, offlining that 'old' CPU results in doorbell interrupts generated for this VPE being discarded. The harsh reality is that VMOVP cannot be elided when a set_affinity() request occurs. It needs to be obeyed, and if an optimisation is to be made, it is at the point where the affinity change request is made (such as in KVM). Drop the VMOVP elision altogether, and only use the vpe_table_mask to try and stay within the same ITS affinity group if at all possible. Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP) Reported-by: Kunkun Jiang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-4-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 8956881503d9a..b83b39e93e1a9 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3805,8 +3805,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3822,19 +3823,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); - if (from == cpu) - goto out; - - vpe->col_idx = cpu; + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + + if (from == cpu) goto out; + vpe->col_idx = cpu; + its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); -- GitLab From 09fad23a1a3249088e8f840685d10ae1650519a9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 8 Feb 2024 17:26:59 +0900 Subject: [PATCH 0142/1418] zonefs: Improve error handling commit 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 upstream. Write error handling is racy and can sometime lead to the error recovery path wrongly changing the inode size of a sequential zone file to an incorrect value which results in garbage data being readable at the end of a file. There are 2 problems: 1) zonefs_file_dio_write() updates a zone file write pointer offset after issuing a direct IO with iomap_dio_rw(). This update is done only if the IO succeed for synchronous direct writes. However, for asynchronous direct writes, the update is done without waiting for the IO completion so that the next asynchronous IO can be immediately issued. However, if an asynchronous IO completes with a failure right before the i_truncate_mutex lock protecting the update, the update may change the value of the inode write pointer offset that was corrected by the error path (zonefs_io_error() function). 2) zonefs_io_error() is called when a read or write error occurs. This function executes a report zone operation using the callback function zonefs_io_error_cb(), which does all the error recovery handling based on the current zone condition, write pointer position and according to the mount options being used. However, depending on the zoned device being used, a report zone callback may be executed in a context that is different from the context of __zonefs_io_error(). As a result, zonefs_io_error_cb() may be executed without the inode truncate mutex lock held, which can lead to invalid error processing. Fix both problems as follows: - Problem 1: Perform the inode write pointer offset update before a direct write is issued with iomap_dio_rw(). This is safe to do as partial direct writes are not supported (IOMAP_DIO_PARTIAL is not set) and any failed IO will trigger the execution of zonefs_io_error() which will correct the inode write pointer offset to reflect the current state of the one on the device. - Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() and call this function directly from __zonefs_io_error() after obtaining the zone information using blkdev_report_zones() with a simple callback function that copies to a local stack variable the struct blk_zone obtained from the device. This ensures that error handling is performed holding the inode truncate mutex. This change also simplifies error handling for conventional zone files by bypassing the execution of report zones entirely. This is safe to do because the condition of conventional zones cannot be read-only or offline and conventional zone files are always fully mapped with a constant file size. Reported-by: Shin'ichiro Kawasaki Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani Signed-off-by: Greg Kroah-Hartman --- fs/zonefs/file.c | 42 +++++++++++++++++++----------- fs/zonefs/super.c | 66 +++++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 63cd50840419c..8d5f4a5a74e65 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -349,7 +349,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -577,6 +582,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); append = sync; } @@ -596,20 +609,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EBUSY; } - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 270ded209dde5..f6b701261078c 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -245,16 +245,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -269,8 +271,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -291,7 +293,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -351,8 +353,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -366,23 +366,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -393,12 +395,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; -- GitLab From 00f9fcc0a109c01b62550abaf063f05635f649fb Mon Sep 17 00:00:00 2001 From: Fred Ai Date: Sat, 3 Feb 2024 02:29:08 -0800 Subject: [PATCH 0143/1418] mmc: sdhci-pci-o2micro: Fix a warm reboot issue that disk can't be detected by BIOS commit 58aeb5623c2ebdadefe6352b14f8076a7073fea0 upstream. Driver shall switch clock source from DLL clock to OPE clock when power off card to ensure that card can be identified with OPE clock by BIOS. Signed-off-by: Fred Ai Fixes:4be33cf18703 ("mmc: sdhci-pci-o2micro: Improve card input timing at SDR104/HS200 mode") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240203102908.4683-1-fredaibayhubtech@126.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index bca1d095b7597..24bb0e9809e76 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -602,6 +602,35 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_o2_enable_clk(host, clk); } +static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot = sdhci_priv(host); + u32 scratch_32 = 0; + u8 scratch_8 = 0; + + chip = slot->chip; + + if (mode == MMC_POWER_OFF) { + /* UnLock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 &= 0x7f; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + + /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ + pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); + scratch_32 &= ~(O2_SD_SEL_DLL); + pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); + + /* Lock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 |= 0x80; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + } + + sdhci_set_power(host, mode, vdd); +} + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) { struct sdhci_pci_chip *chip; @@ -911,6 +940,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_pci_o2_set_power, }; const struct sdhci_pci_fixes sdhci_o2 = { -- GitLab From fcf62f94ad80c3c3b602346ca74665d2ee2386a3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 5 Feb 2024 15:48:53 -0600 Subject: [PATCH 0144/1418] ASoC: amd: yc: Add DMI quirk for Lenovo Ideapad Pro 5 16ARP8 commit 610010737f74482a61896596a0116876ecf9e65c upstream. The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Stanislav Petrov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216925 Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240205214853.2689-1-mario.limonciello@amd.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 3b43595aa87a7..28da4e1858d7e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -241,6 +241,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From d627693e5a55f3e15369f28406da08447d118597 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:32 +0100 Subject: [PATCH 0145/1418] tools/rtla: Remove unused sched_getattr() function commit 084ce16df0f060efd371092a09a7ae74a536dc11 upstream. Clang is reporting: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c src/utils.c:241:19: warning: unused function 'sched_getattr' [-Wunused-function] 241 | static inline int sched_getattr(pid_t pid, struct sched_attr *attr, | ^~~~~~~~~~~~~ 1 warning generated. Which is correct, so remove the unused function. Link: https://lkml.kernel.org/r/eaed7ba122c4ae88ce71277c824ef41cbf789385.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/utils.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f794d2..c9c4af0ad267d 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -243,12 +243,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; -- GitLab From 771b74ce921269bbbcca1505a288ee1a174eb0ef Mon Sep 17 00:00:00 2001 From: limingming3 Date: Wed, 7 Feb 2024 14:51:42 +0800 Subject: [PATCH 0146/1418] tools/rtla: Replace setting prio with nice for SCHED_OTHER commit 14f08c976ffe0d2117c6199c32663df1cbc45c65 upstream. Since the sched_priority for SCHED_OTHER is always 0, it makes no sence to set it. Setting nice for SCHED_OTHER seems more meaningful. Link: https://lkml.kernel.org/r/20240207065142.1753909-1-limingming3@lixiang.com Cc: stable@vger.kernel.org Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: limingming3 Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/utils.c | 6 +++--- tools/tracing/rtla/src/utils.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index c9c4af0ad267d..8c8d63c7196cf 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -478,13 +478,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 5571afd3b5498..92da41aaf4c4c 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -7,6 +7,8 @@ */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ -- GitLab From 5ccb527b66e6e021dc780561ed32c11adf767065 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Fri, 2 Feb 2024 19:16:07 -0500 Subject: [PATCH 0147/1418] tools/rtla: Exit with EXIT_SUCCESS when help is invoked commit b5f319360371087d52070d8f3fc7789e80ce69a6 upstream. Fix rtla so that the following commands exit with 0 when help is invoked rtla osnoise top -h rtla osnoise hist -h rtla timerlat top -h rtla timerlat hist -h Link: https://lore.kernel.org/linux-trace-devel/20240203001607.69703-1-jkacur@redhat.com Cc: stable@vger.kernel.org Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Signed-off-by: John Kacur Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/osnoise_hist.c | 6 +++++- tools/tracing/rtla/src/osnoise_top.c | 6 +++++- tools/tracing/rtla/src/timerlat_hist.c | 6 +++++- tools/tracing/rtla/src/timerlat_top.c | 6 +++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index fe34452fc4ec0..ce49c352b666e 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -472,7 +472,11 @@ static void osnoise_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 76479bfb29224..6c07f360de72c 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -282,7 +282,11 @@ void osnoise_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a83096..cdfe848113456 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -475,7 +475,11 @@ static void timerlat_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 3342719352222..8fc0f6aa19dad 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -305,7 +305,11 @@ static void timerlat_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* -- GitLab From 4ee28d5a4f57f3556c228b9adbaf9049700497c4 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:30 +0100 Subject: [PATCH 0148/1418] tools/rtla: Fix uninitialized bucket/data->bucket_size warning commit 64dc40f7523369912d7adb22c8cb655f71610505 upstream. When compiling rtla with clang, I am getting the following warnings: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [..] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/osnoise_hist.o src/osnoise_hist.c src/osnoise_hist.c:138:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/osnoise_hist.c:149:6: note: uninitialized use occurs here 149 | if (bucket < entries) | ^~~~~~ src/osnoise_hist.c:138:2: note: remove the 'if' if its condition is always true 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 139 | bucket = duration / data->bucket_size; src/osnoise_hist.c:132:12: note: initialize the variable 'bucket' to silence this warning 132 | int bucket; | ^ | = 0 1 warning generated. [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/timerlat_hist.o src/timerlat_hist.c src/timerlat_hist.c:181:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/timerlat_hist.c:204:6: note: uninitialized use occurs here 204 | if (bucket < entries) | ^~~~~~ src/timerlat_hist.c:181:2: note: remove the 'if' if its condition is always true 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 182 | bucket = latency / data->bucket_size; src/timerlat_hist.c:175:12: note: initialize the variable 'bucket' to silence this warning 175 | int bucket; | ^ | = 0 1 warning generated. This is a legit warning, but data->bucket_size is always > 0 (see timerlat_hist_parse_args()), so the if is not necessary. Remove the unneeded if (data->bucket_size) to avoid the warning. Link: https://lkml.kernel.org/r/6e1b1665cd99042ae705b3e0fc410858c4c42346.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Fixes: 829a6c0b5698 ("rtla/osnoise: Add the hist mode") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/osnoise_hist.c | 3 +-- tools/tracing/rtla/src/timerlat_hist.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index ce49c352b666e..b9658f213cb55 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -129,8 +129,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, if (params->output_divisor) duration = duration / params->output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; total_duration = duration * count; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index cdfe848113456..ed08295bfa12c 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -151,8 +151,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (params->output_divisor) latency = latency / params->output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; if (!thread) { hist = data->hist[cpu].irq; -- GitLab From 3ff3e6a9363ab577ec720f6234c0661ea86be459 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:29 +0100 Subject: [PATCH 0149/1418] tools/rtla: Fix Makefile compiler options for clang commit bc4cbc9d260ba8358ca63662919f4bb223cb603b upstream. The following errors are showing up when compiling rtla with clang: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] 1 warning generated. clang -o rtla -ggdb src/osnoise.o src/osnoise_hist.o src/osnoise_top.o src/rtla.o src/timerlat_aa.o src/timerlat.o src/timerlat_hist.o src/timerlat_top.o src/timerlat_u.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) src/osnoise.o: file not recognized: file format not recognized clang: error: linker command failed with exit code 1 (use -v to see invocation) make: *** [Makefile:110: rtla] Error 1 Solve these issues by: - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang - informing the linker about -flto=auto Link: https://lore.kernel.org/linux-trace-kernel/567ac1b94effc228ce9a0225b9df7232a9b35b55.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Fixes: 1a7b22ab15eb ("tools/rtla: Build with EXTRA_{C,LD}FLAGS") Suggested-by: Donald Zickus Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 22e28b76f8004..6912e9577b658 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) -- GitLab From 95de4ad173ca0e61034f3145d66917970961c210 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 6 Feb 2024 11:22:09 +0100 Subject: [PATCH 0150/1418] fs: relax mount_setattr() permission checks commit 46f5ab762d048dad224436978315cbc2fa79c630 upstream. When we added mount_setattr() I added additional checks compared to the legacy do_reconfigure_mnt() and do_change_type() helpers used by regular mount(2). If that mount had a parent then verify that the caller and the mount namespace the mount is attached to match and if not make sure that it's an anonymous mount. The real rootfs falls into neither category. It is neither an anoymous mount because it is obviously attached to the initial mount namespace but it also obviously doesn't have a parent mount. So that means legacy mount(2) allows changing mount properties on the real rootfs but mount_setattr(2) blocks this. I never thought much about this but of course someone on this planet of earth changes properties on the real rootfs as can be seen in [1]. Since util-linux finally switched to the new mount api in 2.39 not so long ago it also relies on mount_setattr() and that surfaced this issue when Fedora 39 finally switched to it. Fix this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843 Link: https://lore.kernel.org/r/20240206-vfs-mount-rootfs-v1-1-19b335eee133@kernel.org Reviewed-by: Jan Kara Reported-by: Karel Zak Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 29a8d90dd1072..1533550f73567 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4172,10 +4172,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) /* * If this is an attached mount make sure it's located in the callers * mount namespace. If it's not don't let the caller interact with it. - * If this is a detached mount make sure it has an anonymous mount - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. + * + * If this mount doesn't have a parent it's most often simply a + * detached mount with an anonymous mount namespace. IOW, something + * that's simply not attached yet. But there are apparently also users + * that do change mount properties on the rootfs itself. That obviously + * neither has a parent nor is it a detached mount so we cannot + * unconditionally check for detached mounts. */ - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) goto out; /* -- GitLab From 058fbaf7716b8b16c92566d5e373fb8f28ba0e89 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:28 +0530 Subject: [PATCH 0151/1418] net: ethernet: ti: cpsw: enable mac_managed_pm to fix mdio commit bc4ce46b1e3d1da4309405cd4afc7c0fcddd0b90 upstream. The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b79bb..d95771ca4e5a3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From 5140c4d5f4fd978b0ad8a2471df1ceb03324922a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 6 Feb 2024 09:58:49 +0100 Subject: [PATCH 0152/1418] s390/qeth: Fix potential loss of L3-IP@ in case of network issues commit 2fe8a236436fe40d8d26a1af8d150fc80f04ee1a upstream. Symptom: In case of a bad cable connection (e.g. dirty optics) a fast sequence of network DOWN-UP-DOWN-UP could happen. UP triggers recovery of the qeth interface. In case of a second DOWN while recovery is still ongoing, it can happen that the IP@ of a Layer3 qeth interface is lost and will not be recovered by the second UP. Problem: When registration of IP addresses with Layer 3 qeth devices fails, (e.g. because of bad address format) the respective IP address is deleted from its hash-table in the driver. If registration fails because of a ENETDOWN condition, the address should stay in the hashtable, so a subsequent recovery can restore it. 3caa4af834df ("qeth: keep ip-address after LAN_OFFLINE failure") fixes this for registration failures during normal operation, but not during recovery. Solution: Keep L3-IP address in case of ENETDOWN in qeth_l3_recover_ip(). For consistency with qeth_l3_add_ip() we also keep it in case of EADDRINUSE, i.e. for some reason the card already/still has this address registered. Fixes: 4a71df50047f ("qeth: new qeth device driver") Cc: stable@vger.kernel.org Signed-off-by: Alexandra Winter Link: https://lore.kernel.org/r/20240206085849.2902775-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c0f30cefec102..a416011391856 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -254,9 +254,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) if (!recover) { hash_del(&addr->hnode); kfree(addr); - continue; + } else { + /* prepare for recovery */ + addr->disp_flag = QETH_DISP_ADDR_ADD; } - addr->disp_flag = QETH_DISP_ADDR_ADD; } mutex_unlock(&card->ip_lock); @@ -277,9 +278,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (addr->disp_flag == QETH_DISP_ADDR_ADD) { rc = qeth_l3_register_addr_entry(card, addr); - if (!rc) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { + /* keep it in the records */ addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { + /* bad address */ hash_del(&addr->hnode); kfree(addr); } -- GitLab From 4888754f3dd04154ef85a5535be935161f219315 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:27 +0530 Subject: [PATCH 0153/1418] net: ethernet: ti: cpsw_new: enable mac_managed_pm to fix mdio commit 9def04e759caa5a3d741891037ae99f81e2fff01 upstream. The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw_new resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw_new.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c7cb9..6e70aa1cc7bf1 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -772,6 +772,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->slave_num); return; } + + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From 309ef7de5d840e17607e7d65cbf297c0564433ef Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Thu, 1 Feb 2024 20:40:38 -0800 Subject: [PATCH 0154/1418] hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 upstream. If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER handler cannot perform VF register successfully as the register call is received before netvsc_probe is finished. This is because we register register_netdevice_notifier() very early( even before vmbus_driver_register()). To fix this, we try to register each such matching VF( if it is visible as a netdevice) at the end of netvsc_probe. Cc: stable@vger.kernel.org Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") Suggested-by: Dexuan Cui Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Dexuan Cui Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c1aac6ceb29e6..1b74055399840 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,6 +42,10 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); @@ -2181,7 +2185,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2204,7 +2208,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2342,7 +2350,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2382,7 +2390,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2480,10 +2488,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2592,6 +2621,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2748,28 +2801,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: -- GitLab From f3f98d7d84b31828004545e29fd7262b9f444139 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Thu, 1 Feb 2024 17:07:16 +0530 Subject: [PATCH 0155/1418] ceph: prevent use-after-free in encode_cap_msg() commit cda4672da1c26835dcbd7aec2bfed954eda9b5ef upstream. In fs/ceph/caps.c, in encode_cap_msg(), "use after free" error was caught by KASAN at this line - 'ceph_buffer_get(arg->xattr_buf);'. This implies before the refcount could be increment here, it was freed. In same file, in "handle_cap_grant()" refcount is decremented by this line - 'ceph_buffer_put(ci->i_xattrs.blob);'. It appears that a race occurred and resource was freed by the latter line before the former line could increment it. encode_cap_msg() is called by __send_cap() and __send_cap() is called by ceph_check_caps() after calling __prep_cap(). __prep_cap() is where arg->xattr_buf is assigned to ci->i_xattrs.blob. This is the spot where the refcount must be increased to prevent "use after free" error. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/59259 Signed-off-by: Rishabh Dave Reviewed-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 111938a6307e6..57603782e7e2a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1391,7 +1391,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; - arg->xattr_buf = ci->i_xattrs.blob; + arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; @@ -1457,6 +1457,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); + ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } -- GitLab From 2e2c07104b4904aed1389a59b25799b95a85b5b9 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 30 Jan 2024 22:04:18 +0100 Subject: [PATCH 0156/1418] fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super commit 79d72c68c58784a3e1cd2378669d51bfd0cb7498 upstream. When configuring a hugetlb filesystem via the fsconfig() syscall, there is a possible NULL dereference in hugetlbfs_fill_super() caused by assigning NULL to ctx->hstate in hugetlbfs_parse_param() when the requested pagesize is non valid. E.g: Taking the following steps: fd = fsopen("hugetlbfs", FSOPEN_CLOEXEC); fsconfig(fd, FSCONFIG_SET_STRING, "pagesize", "1024", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); Given that the requested "pagesize" is invalid, ctxt->hstate will be replaced with NULL, losing its previous value, and we will print an error: ... ... case Opt_pagesize: ps = memparse(param->string, &rest); ctx->hstate = h; if (!ctx->hstate) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } return 0; ... ... This is a problem because later on, we will dereference ctxt->hstate in hugetlbfs_fill_super() ... ... sb->s_blocksize = huge_page_size(ctx->hstate); ... ... Causing below Oops. Fix this by replacing cxt->hstate value only when then pagesize is known to be valid. kernel: hugetlbfs: Unsupported page size 0 MB kernel: BUG: kernel NULL pointer dereference, address: 0000000000000028 kernel: #PF: supervisor read access in kernel mode kernel: #PF: error_code(0x0000) - not-present page kernel: PGD 800000010f66c067 P4D 800000010f66c067 PUD 1b22f8067 PMD 0 kernel: Oops: 0000 [#1] PREEMPT SMP PTI kernel: CPU: 4 PID: 5659 Comm: syscall Tainted: G E 6.8.0-rc2-default+ #22 5a47c3fef76212addcc6eb71344aabc35190ae8f kernel: Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 kernel: Call Trace: kernel: kernel: ? __die_body+0x1a/0x60 kernel: ? page_fault_oops+0x16f/0x4a0 kernel: ? search_bpf_extables+0x65/0x70 kernel: ? fixup_exception+0x22/0x310 kernel: ? exc_page_fault+0x69/0x150 kernel: ? asm_exc_page_fault+0x22/0x30 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: ? hugetlbfs_fill_super+0xb4/0x1a0 kernel: ? hugetlbfs_fill_super+0x28/0x1a0 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: vfs_get_super+0x40/0xa0 kernel: ? __pfx_bpf_lsm_capable+0x10/0x10 kernel: vfs_get_tree+0x25/0xd0 kernel: vfs_cmd_create+0x64/0xe0 kernel: __x64_sys_fsconfig+0x395/0x410 kernel: do_syscall_64+0x80/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? exc_page_fault+0x69/0x150 kernel: entry_SYSCALL_64_after_hwframe+0x6e/0x76 kernel: RIP: 0033:0x7ffbc0cb87c9 kernel: Code: 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 96 0d 00 f7 d8 64 89 01 48 kernel: RSP: 002b:00007ffc29d2f388 EFLAGS: 00000206 ORIG_RAX: 00000000000001af kernel: RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffbc0cb87c9 kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 kernel: RBP: 00007ffc29d2f3b0 R08: 0000000000000000 R09: 0000000000000000 kernel: R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 kernel: R13: 00007ffc29d2f4c0 R14: 0000000000000000 R15: 0000000000000000 kernel: kernel: Modules linked in: rpcsec_gss_krb5(E) auth_rpcgss(E) nfsv4(E) dns_resolver(E) nfs(E) lockd(E) grace(E) sunrpc(E) netfs(E) af_packet(E) bridge(E) stp(E) llc(E) iscsi_ibft(E) iscsi_boot_sysfs(E) intel_rapl_msr(E) intel_rapl_common(E) iTCO_wdt(E) intel_pmc_bxt(E) sb_edac(E) iTCO_vendor_support(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) rfkill(E) ipmi_ssif(E) kvm(E) acpi_ipmi(E) irqbypass(E) pcspkr(E) igb(E) ipmi_si(E) mei_me(E) i2c_i801(E) joydev(E) intel_pch_thermal(E) i2c_smbus(E) dca(E) lpc_ich(E) mei(E) ipmi_devintf(E) ipmi_msghandler(E) acpi_pad(E) tiny_power_button(E) button(E) fuse(E) efi_pstore(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) sd_mod(E) t10_pi(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) polyval_clmulni(E) ahci(E) xhci_pci(E) polyval_generic(E) gf128mul(E) ghash_clmulni_intel(E) sha512_ssse3(E) sha256_ssse3(E) xhci_pci_renesas(E) libahci(E) ehci_pci(E) sha1_ssse3(E) xhci_hcd(E) ehci_hcd(E) libata(E) kernel: mgag200(E) i2c_algo_bit(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) aesni_intel(E) crypto_simd(E) cryptd(E) kernel: Unloaded tainted modules: acpi_cpufreq(E):1 fjes(E):1 kernel: CR2: 0000000000000028 kernel: ---[ end trace 0000000000000000 ]--- kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 Link: https://lkml.kernel.org/r/20240130210418.3771-1-osalvador@suse.de Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") Signed-off-by: Michal Hocko Signed-off-by: Oscar Salvador Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8eea709e36599..ab0d30af7d9c3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1350,6 +1350,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1394,11 +1395,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: -- GitLab From 79081197b4e235fa821302f268400e1c24486022 Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Tue, 23 Jan 2024 12:04:42 -0800 Subject: [PATCH 0157/1418] mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE commit e656c7a9e59607d1672d85ffa9a89031876ffe67 upstream. For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages are not reserved. However when the shared memory is attached with the shmat() call the hugetlb pages are getting reserved incorrectly for SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. ------------------------------- Following test shows the issue. $cat shmhtb.c int main() { int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; int shmid; shmid = shmget(SKEY, SHMSZ, shmflags); if (shmid < 0) { printf("shmat: shmget() failed, %d\n", errno); return 1; } printf("After shmget()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmat(shmid, NULL, 0); printf("\nAfter shmat()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmctl(shmid, IPC_RMID, NULL); return 0; } #sysctl -w vm.nr_hugepages=20 #./shmhtb After shmget() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 0 HugePages_Surp: 0 After shmat() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 5 <-- HugePages_Surp: 0 -------------------------------- Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared memory in the shmat() call. Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ab0d30af7d9c3..4fe4b3393e71c 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -123,6 +123,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -164,10 +165,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; -- GitLab From 08c1948823765a9a11e7782380ad1b6b55fcb595 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 23 Jan 2024 16:14:22 +0100 Subject: [PATCH 0158/1418] of: property: fix typo in io-channels commit 8f7e917907385e112a845d668ae2832f41e64bf5 upstream. The property is io-channels and not io-channel. This was effectively preventing the devlink creation. Fixes: 8e12257dead7 ("of: property: Add device link support for iommus, mboxes and io-channels") Cc: stable@vger.kernel.org Signed-off-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240123-iio-backend-v7-1-1bff236b8693@analog.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index e1946cc170309..550efd1a58fc6 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1243,7 +1243,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") -DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") +DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") -- GitLab From 8a72a4689a8d0d228932985575c76ff5176e1086 Mon Sep 17 00:00:00 2001 From: Maxime Jayat Date: Mon, 6 Nov 2023 19:01:58 +0100 Subject: [PATCH 0159/1418] can: netlink: Fix TDCO calculation using the old data bittiming commit 2aa0a5e65eae27dbd96faca92c84ecbf6f492d42 upstream. The TDCO calculation was done using the currently applied data bittiming, instead of the newly computed data bittiming, which means that the TDCO had an invalid value unless setting the same data bittiming twice. Fixes: d99755f71a80 ("can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC)") Signed-off-by: Maxime Jayat Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/all/40579c18-63c0-43a4-8d4c-f3a6c1c0b417@munic.io Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 8efa22d9f214d..053d375eae4f5 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -311,7 +311,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ - can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, + can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing -- GitLab From aedda066d717a0b4335d7e0a00b2e3a61e40afcf Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 21 Jul 2023 09:22:26 -0700 Subject: [PATCH 0160/1418] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock commit 6cdedc18ba7b9dacc36466e27e3267d201948c8d upstream. The following 3 locks would race against each other, causing the deadlock situation in the Syzbot bug report: - j1939_socks_lock - active_session_list_lock - sk_session_queue_lock A reasonable fix is to change j1939_socks_lock to an rwlock, since in the rare situations where a write lock is required for the linked list that j1939_socks_lock is protecting, the code does not attempt to acquire any more locks. This would break the circular lock dependency, where, for example, the current thread already locks j1939_socks_lock and attempts to acquire sk_session_queue_lock, and at the same time, another thread attempts to acquire j1939_socks_lock while holding sk_session_queue_lock. NOTE: This patch along does not fix the unregister_netdevice bug reported by Syzbot; instead, it solves a deadlock situation to prepare for one or more further patches to actually fix the Syzbot bug, which appears to be a reference counting problem within the j1939 codebase. Reported-by: Signed-off-by: Ziqi Zhao Reviewed-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20230721162226.8639-1-astrajoan@yahoo.com [mkl: remove unrelated newline change] Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/j1939-priv.h | 2 +- net/can/j1939/main.c | 2 +- net/can/j1939/socket.c | 24 ++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f80f60..74f15592d1708 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947d683..a6fb89fa62785 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b0be23559243c..a0bf2575febb7 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -1080,12 +1080,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1273,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1281,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, -- GitLab From 4dd684d4bb3cd5454e0bf6e2a1bdfbd5c9c872ed Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 20 Oct 2023 15:38:14 +0200 Subject: [PATCH 0161/1418] can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER) commit efe7cf828039aedb297c1f9920b638fffee6aabc upstream. Lock jsk->sk to prevent UAF when setsockopt(..., SO_J1939_FILTER, ...) modifies jsk->filters while receiving packets. Following trace was seen on affected system: ================================================================== BUG: KASAN: slab-use-after-free in j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] Read of size 4 at addr ffff888012144014 by task j1939/350 CPU: 0 PID: 350 Comm: j1939 Tainted: G W OE 6.5.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: print_report+0xd3/0x620 ? kasan_complete_mode_report_info+0x7d/0x200 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] kasan_report+0xc2/0x100 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] __asan_load4+0x84/0xb0 j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] j1939_sk_recv+0x20b/0x320 [can_j1939] ? __kasan_check_write+0x18/0x20 ? __pfx_j1939_sk_recv+0x10/0x10 [can_j1939] ? j1939_simple_recv+0x69/0x280 [can_j1939] ? j1939_ac_recv+0x5e/0x310 [can_j1939] j1939_can_recv+0x43f/0x580 [can_j1939] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] ? raw_rcv+0x42/0x3c0 [can_raw] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] can_rcv_filter+0x11f/0x350 [can] can_receive+0x12f/0x190 [can] ? __pfx_can_rcv+0x10/0x10 [can] can_rcv+0xdd/0x130 [can] ? __pfx_can_rcv+0x10/0x10 [can] __netif_receive_skb_one_core+0x13d/0x150 ? __pfx___netif_receive_skb_one_core+0x10/0x10 ? __kasan_check_write+0x18/0x20 ? _raw_spin_lock_irq+0x8c/0xe0 __netif_receive_skb+0x23/0xb0 process_backlog+0x107/0x260 __napi_poll+0x69/0x310 net_rx_action+0x2a1/0x580 ? __pfx_net_rx_action+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? handle_irq_event+0x7d/0xa0 __do_softirq+0xf3/0x3f8 do_softirq+0x53/0x80 __local_bh_enable_ip+0x6e/0x70 netif_rx+0x16b/0x180 can_send+0x32b/0x520 [can] ? __pfx_can_send+0x10/0x10 [can] ? __check_object_size+0x299/0x410 raw_sendmsg+0x572/0x6d0 [can_raw] ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] ? apparmor_socket_sendmsg+0x2f/0x40 ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] sock_sendmsg+0xef/0x100 sock_write_iter+0x162/0x220 ? __pfx_sock_write_iter+0x10/0x10 ? __rtnl_unlock+0x47/0x80 ? security_file_permission+0x54/0x320 vfs_write+0x6ba/0x750 ? __pfx_vfs_write+0x10/0x10 ? __fget_light+0x1ca/0x1f0 ? __rcu_read_unlock+0x5b/0x280 ksys_write+0x143/0x170 ? __pfx_ksys_write+0x10/0x10 ? __kasan_check_read+0x15/0x20 ? fpregs_assert_state_consistent+0x62/0x70 __x64_sys_write+0x47/0x60 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6d/0x90 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x79/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 348: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_alloc_info+0x1f/0x30 __kasan_kmalloc+0xb5/0xc0 __kmalloc_node_track_caller+0x67/0x160 j1939_sk_setsockopt+0x284/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 349: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_free_info+0x2f/0x50 __kasan_slab_free+0x12e/0x1c0 __kmem_cache_free+0x1b9/0x380 kfree+0x7a/0x120 j1939_sk_setsockopt+0x3b2/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Reported-by: Sili Luo Suggested-by: Sili Luo Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20231020133814.383996-1-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/j1939-priv.h | 1 + net/can/j1939/socket.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 74f15592d1708..31a93cae5111b 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index a0bf2575febb7..58909b36561a6 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; -- GitLab From 9359ff1a4501ea7286aec70b90f88fac66a6aea5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 27 Dec 2023 16:21:24 +0100 Subject: [PATCH 0162/1418] pmdomain: core: Move the unused cleanup to a _sync initcall commit 741ba0134fa7822fcf4e4a0a537a5c4cfd706b20 upstream. The unused clock cleanup uses the _sync initcall to give all users at earlier initcalls time to probe. Do the same to avoid leaving some PDs dangling at "on" (which actually happened on qcom!). Fixes: 2fe71dcdfd10 ("PM / domains: Add late_initcall to disable unused PM domains") Signed-off-by: Konrad Dybcio Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231227-topic-pmdomain_sync_cleanup-v1-1-5f36769d538b@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 56ceba4698024..d238b47f74c34 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1052,7 +1052,7 @@ static int __init genpd_power_off_unused(void) return 0; } -late_initcall(genpd_power_off_unused); +late_initcall_sync(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP -- GitLab From c7f9c3e94e6113021870c247826ec41d4d652ce7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:55 +0100 Subject: [PATCH 0163/1418] fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand() commit 60f92acb60a989b14e4b744501a0df0f82ef30a3 upstream. Patch series "fs/proc: do_task_stat: use sig->stats_". do_task_stat() has the same problem as getrusage() had before "getrusage: use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If NR_CPUS threads call lock_task_sighand() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. This patch (of 3): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. Not only this removes for_each_thread() from the critical section with irqs disabled, this removes another case when stats_lock is taken with siglock held. We want to remove this dependency, then we can change the users of stats_lock to not disable irqs. Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b8103c7e..1b0d78dfd20f9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -501,7 +501,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = 0; + cutime = cstime = 0; cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { @@ -535,7 +535,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) @@ -551,10 +550,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - if (!whole) { + + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { + task_cputime_adjusted(task, &utime, &stime); min_flt = task->min_flt; maj_flt = task->maj_flt; - task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } -- GitLab From 1e4432d463f38d98d40b6f0db2a1d93f2615bb3a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 14 Feb 2024 11:20:46 -0500 Subject: [PATCH 0164/1418] tracing: Inform kmemleak of saved_cmdlines allocation commit 2394ac4145ea91b92271e675a09af2a9ea6840b7 upstream. The allocation of the struct saved_cmdlines_buffer structure changed from: s = kmalloc(sizeof(*s), GFP_KERNEL); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); to: orig_size = sizeof(*s) + val * TASK_COMM_LEN; order = get_order(orig_size); size = 1 << (order + PAGE_SHIFT); page = alloc_pages(GFP_KERNEL, order); if (!page) return NULL; s = page_address(page); memset(s, 0, sizeof(*s)); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); Where that s->saved_cmdlines allocation looks to be a dangling allocation to kmemleak. That's because kmemleak only keeps track of kmalloc() allocations. For allocations that use page_alloc() directly, the kmemleak needs to be explicitly informed about it. Add kmemleak_alloc() and kmemleak_free() around the page allocation so that it doesn't give the following false positive: unreferenced object 0xffff8881010c8000 (size 32760): comm "swapper", pid 0, jiffies 4294667296 hex dump (first 32 bytes): ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ backtrace (crc ae6ec1b9): [] kmemleak_alloc+0x45/0x80 [] __kmalloc_large_node+0x10d/0x190 [] __kmalloc+0x3b1/0x4c0 [] allocate_cmdlines_buffer+0x113/0x230 [] tracer_alloc_buffers.isra.0+0x124/0x460 [] early_trace_init+0x14/0xa0 [] start_kernel+0x12e/0x3c0 [] x86_64_start_reservations+0x18/0x30 [] x86_64_start_kernel+0x7b/0x80 [] secondary_startup_64_no_verify+0x15e/0x16b Link: https://lore.kernel.org/linux-trace-kernel/87r0hfnr9r.fsf@kernel.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240214112046.09a322d6@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Catalin Marinas Fixes: 44dc5c41b5b1 ("tracing: Fix wasted memory in saved_cmdlines logic") Reported-by: Kalle Valo Tested-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fa6c193e22f02..f667d6bdddda5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2268,6 +2269,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); kfree(s->map_cmdline_to_pid); + kmemleak_free(s); free_pages((unsigned long)s, order); } @@ -2287,6 +2289,7 @@ static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) return NULL; s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); memset(s, 0, sizeof(*s)); /* Round up to actual allocation */ -- GitLab From cf3c8916866ca47f569dc1b92c1c78a6311cb34f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Jun 2023 16:38:47 +0800 Subject: [PATCH 0165/1418] xfrm: Use xfrm_state selector for BEET input commit 842665a9008a53ff13ac22a4e4b8ae2f10e92aca upstream. For BEET the inner address and therefore family is stored in the xfrm_state selector. Use that when decapsulating an input packet instead of incorrectly relying on a non-existent tunnel protocol. Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index deda4955c0466..c98ec262b45e9 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -331,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, { switch (x->props.mode) { case XFRM_MODE_BEET: - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - case IPPROTO_IPV6: + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); } break; -- GitLab From 0a371ed6f2c105951ca723f18dbec6bd95204962 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Jul 2023 08:53:49 +0800 Subject: [PATCH 0166/1418] xfrm: Silence warnings triggerable by bad packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 57010b8ece2821a1fdfdba2197d14a022f3769db upstream. After the elimination of inner modes, a couple of warnings that were previously unreachable can now be triggered by malformed inbound packets. Fix this by: 1. Moving the setting of skb->protocol into the decap functions. 2. Returning -EINVAL when unexpected protocol is seen. Reported-by: Maciej Żenczykowski Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Signed-off-by: Herbert Xu Reviewed-by: Maciej Żenczykowski Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index c98ec262b45e9..d0320e35accbf 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int optlen = 0; int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { struct ip_beet_phdr *ph; int phlen; @@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IPV6); + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int size = sizeof(struct ipv6hdr); int err; + skb->protocol = htons(ETH_P_IPV6); + err = skb_cow_head(skb, size + skb->mac_len); if (err) goto out; @@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, return xfrm6_remove_tunnel_encap(x, skb); break; } + return -EINVAL; } WARN_ON_ONCE(1); @@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: - skb->protocol = htons(ETH_P_IP); - break; - case IPPROTO_IPV6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - WARN_ON_ONCE(1); - break; - } - return xfrm_inner_mode_encap_remove(x, skb); } -- GitLab From 944900fe2736c07288efe2d9394db4d3ca23f2c9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 22 Nov 2023 22:44:47 +0100 Subject: [PATCH 0167/1418] tls: fix NULL deref on tls_sw_splice_eof() with empty record commit 53f2cb491b500897a619ff6abd72f565933760f0 upstream. syzkaller discovered that if tls_sw_splice_eof() is executed as part of sendfile() when the plaintext/ciphertext sk_msg are empty, the send path gets confused because the empty ciphertext buffer does not have enough space for the encryption overhead. This causes tls_push_record() to go on the `split = true` path (which is only supposed to be used when interacting with an attached BPF program), and then get further confused and hit the tls_merge_open_record() path, which then assumes that there must be at least one populated buffer element, leading to a NULL deref. It is possible to have empty plaintext/ciphertext buffers if we previously bailed from tls_sw_sendmsg_locked() via the tls_trim_both_msgs() path. tls_sw_push_pending_record() already handles this case correctly; let's do the same check in tls_sw_splice_eof(). Fixes: df720d288dbb ("tls/sw: Use splice_eof() to flush") Cc: stable@vger.kernel.org Reported-by: syzbot+40d43509a099ea756317@syzkaller.appspotmail.com Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231122214447.675768-1-jannh@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 09d258bb2df75..c8cbdd02a784e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1179,11 +1179,14 @@ void tls_sw_splice_eof(struct socket *sock) lock_sock(sk); retry: + /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, -- GitLab From 84df059d24680ebf93001328eda06d3c60ae2b58 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 22 Jan 2024 12:05:54 +0000 Subject: [PATCH 0168/1418] selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory [ Upstream commit d021b442cf312664811783e92b3d5e4548e92a53 ] ksm_tests was previously mmapping a region of memory, aligning the returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was setting it past the end of the mmapped area due to not taking the pointer alignment into consideration. Fix this behaviour. Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries"), this buggy behavior was (usually) masked because the alignment difference was always less than PMD-size. But since the mentioned commit, `ksm_tests -H -s 100` started failing. Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test") Signed-off-by: Ryan Roberts Cc: Pedro Demarchi Gomes Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/ksm_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa3..a811659307855 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -470,7 +470,7 @@ static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t m if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); -- GitLab From 9a163479ddc45e42a1c20cb48a82d7dc11644c6f Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Fri, 19 Jan 2024 15:58:01 -0500 Subject: [PATCH 0169/1418] selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag [ Upstream commit 52e63d67b5bb423b33d7a262ac7f8bd375a90145 ] In order for the page table level 5 to be in use, the CPU must have the setting enabled in addition to the CONFIG option. Check for the flag to be set to avoid false test failures on systems that do not have this cpu flag set. The test does a series of mmap calls including three using the MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These addresses are only available if you are using level 5 page tables, which requires both the CPU to have the capabiltiy (la57 flag) and the kernel to be configured. Currently the test only checks for the kernel configuration option, so this test can still report a false positive. Here are the three failing lines: $ ./va_high_addr_switch | grep FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED I thought (for about a second) refactoring the test so that these three mmap calls will only be run on systems with the level 5 page tables available, but the whole point of the test is to check the level 5 feature... Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging") Signed-off-by: Audra Mitchell Cc: Rafael Aquini Cc: Shuah Khan Cc: Adam Sindelar Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/va_128TBswitch.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh index 41580751dc511..231622b3a2327 100755 --- a/tools/testing/selftests/vm/va_128TBswitch.sh +++ b/tools/testing/selftests/vm/va_128TBswitch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } -- GitLab From 4bf19cef220aaff4b026143518916355806006e7 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 8 Nov 2023 10:22:15 -0800 Subject: [PATCH 0170/1418] md: bypass block throttle for superblock update [ Upstream commit d6e035aad6c09991da1c667fb83419329a3baed8 ] commit 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d") introduced a hung bug and will be reverted in next patch, since the issue that commit is fixing is due to md superblock write is throttled by wbt, to fix it, we can have superblock write bypass block layer throttle. Fixes: 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d") Cc: stable@vger.kernel.org # v5.19+ Suggested-by: Yu Kuai Signed-off-by: Junxiao Bi Reviewed-by: Logan Gunthorpe Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231108182216.73611-1-junxiao.bi@oracle.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3ccf1920682cb..c7efe15229514 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -963,9 +963,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, return; bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, - 1, - REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, - GFP_NOIO, &mddev->sync_set); + 1, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META + | REQ_PREFLUSH | REQ_FUA, + GFP_NOIO, &mddev->sync_set); atomic_inc(&rdev->nr_pending); -- GitLab From 5447e64acce87bec0f686da582f1c2346555a625 Mon Sep 17 00:00:00 2001 From: Andrejs Cainikovs Date: Fri, 20 Oct 2023 17:30:22 +0200 Subject: [PATCH 0171/1418] ARM: dts: imx6q-apalis: add can power-up delay on ixora board [ Upstream commit b76bbf835d8945080b22b52fc1e6f41cde06865d ] Newer variants of Ixora boards require a power-up delay when powering up the CAN transceiver of up to 1ms. Cc: stable@vger.kernel.org Signed-off-by: Andrejs Cainikovs Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts index f9f7d99bd4db8..76f3e07bc8826 100644 --- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts +++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts @@ -76,6 +76,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can1_power>; regulator-name = "can1_supply"; + startup-delay-us = <1000>; }; reg_can2_supply: regulator-can2-supply { @@ -85,6 +86,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can2_power>; regulator-name = "can2_supply"; + startup-delay-us = <1000>; }; }; -- GitLab From 48b348232070035410d8a58ff5ee18c221785cdd Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 27 Jan 2023 15:02:00 +0100 Subject: [PATCH 0172/1418] wifi: mwifiex: Support SD8978 chipset [ Upstream commit bba047f15851c8b053221f1b276eb7682d59f755 ] The Marvell SD8978 (aka NXP IW416) uses identical registers as SD8987, so reuse the existing mwifiex_reg_sd8987 definition. Note that mwifiex_reg_sd8977 and mwifiex_reg_sd8997 are likewise identical, save for the fw_dump_ctrl register: They define it as 0xf0 whereas mwifiex_reg_sd8987 defines it as 0xf9. I've verified that 0xf9 is the correct value on SD8978. NXP's out-of-tree driver uses 0xf9 for all of them, so there's a chance that 0xf0 is not correct in the mwifiex_reg_sd8977 and mwifiex_reg_sd8997 definitions. I cannot test that for lack of hardware, hence am leaving it as is. NXP has only released a firmware which runs Bluetooth over UART. Perhaps Bluetooth over SDIO is unsupported by this chipset. Consequently, only an "sdiouart" firmware image is referenced, not an alternative "sdsd" image. Signed-off-by: Lukas Wunner Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/536b4f17a72ca460ad1b07045757043fb0778988.1674827105.git.lukas@wunner.de Stable-dep-of: 1c5d463c0770 ("wifi: mwifiex: add extra delay for firmware ready") Signed-off-by: Sasha Levin --- .../bindings/net/wireless/marvell-8xxx.txt | 4 ++- drivers/net/wireless/marvell/mwifiex/Kconfig | 5 ++-- drivers/net/wireless/marvell/mwifiex/sdio.c | 25 +++++++++++++++++-- drivers/net/wireless/marvell/mwifiex/sdio.h | 1 + include/linux/mmc/sdio_ids.h | 1 + 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt index 9bf9bbac16e25..cdc303caf5f45 100644 --- a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt +++ b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt @@ -1,4 +1,4 @@ -Marvell 8787/8897/8997 (sd8787/sd8897/sd8997/pcie8997) SDIO/PCIE devices +Marvell 8787/8897/8978/8997 (sd8787/sd8897/sd8978/sd8997/pcie8997) SDIO/PCIE devices ------ This node provides properties for controlling the Marvell SDIO/PCIE wireless device. @@ -10,7 +10,9 @@ Required properties: - compatible : should be one of the following: * "marvell,sd8787" * "marvell,sd8897" + * "marvell,sd8978" * "marvell,sd8997" + * "nxp,iw416" * "pci11ab,2b42" * "pci1b4b,2b42" diff --git a/drivers/net/wireless/marvell/mwifiex/Kconfig b/drivers/net/wireless/marvell/mwifiex/Kconfig index 2b4ff2b78a7e1..b182f7155d66f 100644 --- a/drivers/net/wireless/marvell/mwifiex/Kconfig +++ b/drivers/net/wireless/marvell/mwifiex/Kconfig @@ -10,13 +10,14 @@ config MWIFIEX mwifiex. config MWIFIEX_SDIO - tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8987/SD8997" + tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8978/SD8987/SD8997" depends on MWIFIEX && MMC select FW_LOADER select WANT_DEV_COREDUMP help This adds support for wireless adapters based on Marvell - 8786/8787/8797/8887/8897/8977/8987/8997 chipsets with SDIO interface. + 8786/8787/8797/8887/8897/8977/8978/8987/8997 chipsets with + SDIO interface. SD8978 is also known as NXP IW416. If you choose to build it as a module, it will be called mwifiex_sdio. diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index ea1c1c2412e72..a24bd40dd41ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -263,7 +263,7 @@ static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8887 = { 0x68, 0x69, 0x6a}, }; -static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8987 = { +static const struct mwifiex_sdio_card_reg mwifiex_reg_sd89xx = { .start_rd_port = 0, .start_wr_port = 0, .base_0_reg = 0xF8, @@ -394,6 +394,22 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .can_ext_scan = true, }; +static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { + .firmware_sdiouart = SD8978_SDIOUART_FW_NAME, + .reg = &mwifiex_reg_sd89xx, + .max_ports = 32, + .mp_agg_pkt_limit = 16, + .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, + .mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .supports_sdio_new_mode = true, + .has_control_mask = false, + .can_dump_fw = true, + .fw_dump_enh = true, + .can_auto_tdls = false, + .can_ext_scan = true, +}; + static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .firmware = SD8997_DEFAULT_FW_NAME, .firmware_sdiouart = SD8997_SDIOUART_FW_NAME, @@ -428,7 +444,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .firmware = SD8987_DEFAULT_FW_NAME, - .reg = &mwifiex_reg_sd8987, + .reg = &mwifiex_reg_sd89xx, .max_ports = 32, .mp_agg_pkt_limit = 16, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K, @@ -482,7 +498,9 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, + { .compatible = "marvell,sd8978" }, { .compatible = "marvell,sd8997" }, + { .compatible = "nxp,iw416" }, { } }; @@ -920,6 +938,8 @@ static const struct sdio_device_id mwifiex_ids[] = { .driver_data = (unsigned long)&mwifiex_sdio_sd8801}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8977}, + {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8978_WLAN), + .driver_data = (unsigned long)&mwifiex_sdio_sd8978}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8987}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_WLAN), @@ -3164,6 +3184,7 @@ MODULE_FIRMWARE(SD8797_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8897_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8887_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8977_DEFAULT_FW_NAME); +MODULE_FIRMWARE(SD8978_SDIOUART_FW_NAME); MODULE_FIRMWARE(SD8987_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_SDIOUART_FW_NAME); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index 3a24bb48b2996..ae94c172310ff 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -25,6 +25,7 @@ #define SD8887_DEFAULT_FW_NAME "mrvl/sd8887_uapsta.bin" #define SD8801_DEFAULT_FW_NAME "mrvl/sd8801_uapsta.bin" #define SD8977_DEFAULT_FW_NAME "mrvl/sdsd8977_combo_v2.bin" +#define SD8978_SDIOUART_FW_NAME "mrvl/sdiouartiw416_combo_v0.bin" #define SD8987_DEFAULT_FW_NAME "mrvl/sd8987_uapsta.bin" #define SD8997_DEFAULT_FW_NAME "mrvl/sdsd8997_combo_v4.bin" #define SD8997_SDIOUART_FW_NAME "mrvl/sdiouart8997_combo_v4.bin" diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 74f9d9a6d3307..0e4ef9c5127ad 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -102,6 +102,7 @@ #define SDIO_DEVICE_ID_MARVELL_8977_BT 0x9146 #define SDIO_DEVICE_ID_MARVELL_8987_WLAN 0x9149 #define SDIO_DEVICE_ID_MARVELL_8987_BT 0x914a +#define SDIO_DEVICE_ID_MARVELL_8978_WLAN 0x9159 #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 -- GitLab From 1b7b597a69bba6b0dec27845e5935e090b7c084c Mon Sep 17 00:00:00 2001 From: David Lin Date: Sat, 9 Dec 2023 07:40:29 +0800 Subject: [PATCH 0173/1418] wifi: mwifiex: add extra delay for firmware ready [ Upstream commit 1c5d463c0770c6fa2037511a24fb17966fd07d97 ] For SDIO IW416, due to a bug, FW may return ready before complete full initialization. Command timeout may occur at driver load after reboot. Workaround by adding 100ms delay at checking FW status. Signed-off-by: David Lin Cc: stable@vger.kernel.org Reviewed-by: Francesco Dolcini Acked-by: Brian Norris Tested-by: Marcel Ziswiler # Verdin AM62 (IW416) Signed-off-by: Kalle Valo Link: https://msgid.link/20231208234029.2197-1-yu-hao.lin@nxp.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/sdio.c | 19 +++++++++++++++++++ drivers/net/wireless/marvell/mwifiex/sdio.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index a24bd40dd41ab..e55747b50dbfb 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -331,6 +331,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8786 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = false, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { @@ -346,6 +347,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { @@ -361,6 +363,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { @@ -376,6 +379,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { .can_dump_fw = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { @@ -392,6 +396,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { @@ -408,6 +413,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = true, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { @@ -425,6 +431,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { @@ -440,6 +447,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { .can_dump_fw = false, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { @@ -456,6 +464,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .fw_dump_enh = true, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { @@ -471,6 +480,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static struct memory_type_mapping generic_mem_type_map[] = { @@ -563,6 +573,7 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id) card->fw_dump_enh = data->fw_dump_enh; card->can_auto_tdls = data->can_auto_tdls; card->can_ext_scan = data->can_ext_scan; + card->fw_ready_extra_delay = data->fw_ready_extra_delay; INIT_WORK(&card->work, mwifiex_sdio_work); } @@ -766,6 +777,7 @@ mwifiex_sdio_read_fw_status(struct mwifiex_adapter *adapter, u16 *dat) static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { + struct sdio_mmc_card *card = adapter->card; int ret = 0; u16 firmware_stat; u32 tries; @@ -783,6 +795,13 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, ret = -1; } + if (card->fw_ready_extra_delay && + firmware_stat == FIRMWARE_READY_SDIO) + /* firmware might pretend to be ready, when it's not. + * Wait a little bit more as a workaround. + */ + msleep(100); + return ret; } diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index ae94c172310ff..a5112cb35cdcd 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -258,6 +258,7 @@ struct sdio_mmc_card { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; struct mwifiex_sdio_mpa_tx mpa_tx; struct mwifiex_sdio_mpa_rx mpa_rx; @@ -281,6 +282,7 @@ struct mwifiex_sdio_device { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; }; /* -- GitLab From 475369350157844dde3f9065591c95ec16aa64a9 Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Tue, 28 Nov 2023 22:35:05 +0100 Subject: [PATCH 0174/1418] bus: moxtet: Add spi device table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aaafe88d5500ba18b33be72458439367ef878788 ] The moxtet module fails to auto-load on. Add a SPI id table to allow it to do so. Signed-off-by: Sjoerd Simons Cc: Reviewed-by: Marek Behún Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- drivers/bus/moxtet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 5eb0fe73ddc45..79fc96c8d8364 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -830,6 +830,12 @@ static void moxtet_remove(struct spi_device *spi) mutex_destroy(&moxtet->lock); } +static const struct spi_device_id moxtet_spi_ids[] = { + { "moxtet" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, moxtet_spi_ids); + static const struct of_device_id moxtet_dt_ids[] = { { .compatible = "cznic,moxtet" }, {}, @@ -841,6 +847,7 @@ static struct spi_driver moxtet_spi_driver = { .name = "moxtet", .of_match_table = moxtet_dt_ids, }, + .id_table = moxtet_spi_ids, .probe = moxtet_probe, .remove = moxtet_remove, }; -- GitLab From 9c84d580de3c6f816ab43373e21e421e5687e52e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sat, 7 Jan 2023 12:09:57 +0100 Subject: [PATCH 0175/1418] arm64: dts: qcom: msm8916: Enable blsp_dma by default [ Upstream commit 0154d3594af3c198532ac7b4ab70f50fb5207a15 ] Adding the "dmas" to the I2C controllers prevents probing them if blsp_dma is disabled (infinite probe deferral). Avoid this by enabling blsp_dma by default - it's an integral part of the SoC that is almost always used (even if just for UART). Signed-off-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230107110958.5762-2-stephan@gerhold.net Stable-dep-of: 7c45b6ddbcff ("arm64: dts: qcom: msm8916: Make blsp_dma controlled-remotely") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/apq8016-sbc.dts | 4 ---- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 - 2 files changed, 5 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts index 9d116e1fbe10c..1ac4f8c24e231 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts @@ -169,10 +169,6 @@ }; }; -&blsp_dma { - status = "okay"; -}; - &blsp_i2c2 { /* On Low speed expansion */ status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index bafac2cf7e3d6..f0d097ade84c5 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,7 +1522,6 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; - status = "disabled"; }; blsp1_uart1: serial@78af000 { -- GitLab From 2488e0e4bc23327af7fbd42a0a9743ca16b6081a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 4 Dec 2023 11:21:20 +0100 Subject: [PATCH 0176/1418] arm64: dts: qcom: msm8916: Make blsp_dma controlled-remotely [ Upstream commit 7c45b6ddbcff01f9934d11802010cfeb0879e693 ] The blsp_dma controller is shared between the different subsystems, which is why it is already initialized by the firmware. We should not reinitialize it from Linux to avoid potential other users of the DMA engine to misbehave. In mainline this can be described using the "qcom,controlled-remotely" property. In the downstream/vendor kernel from Qualcomm there is an opposite "qcom,managed-locally" property. This property is *not* set for the qcom,sps-dma@7884000 [1] so adding "qcom,controlled-remotely" upstream matches the behavior of the downstream/vendor kernel. Adding this seems to fix some weird issues with UART where both input/output becomes garbled with certain obscure firmware versions on some devices. [1]: https://git.codelinaro.org/clo/la/kernel/msm-3.10/-/blob/LA.BR.1.2.9.1-02310-8x16.0/arch/arm/boot/dts/qcom/msm8916.dtsi#L1466-1472 Cc: stable@vger.kernel.org # 6.5 Fixes: a0e5fb103150 ("arm64: dts: qcom: Add msm8916 BLSP device nodes") Signed-off-by: Stephan Gerhold Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20231204-msm8916-blsp-dma-remote-v1-1-3e49c8838c8d@gerhold.net Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index f0d097ade84c5..987cebbda0571 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,6 +1522,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; + qcom,controlled-remotely; }; blsp1_uart1: serial@78af000 { -- GitLab From c0e41c8756eff4a1d2101b58e436237cd7e0a31b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:01 +0100 Subject: [PATCH 0177/1418] arm64: dts: qcom: sdm845: fix USB SS wakeup [ Upstream commit 971f5d8b0618d09db75184ddd8cca0767514db5d ] The USB SS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states. Fixes: ca4db2b538a1 ("arm64: dts: qcom: sdm845: Add USB-related nodes") Cc: stable@vger.kernel.org # 4.20 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 4d5905ef0b411..95c515da9f2e0 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4049,7 +4049,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -4100,7 +4100,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", -- GitLab From 0a9e803549111f9fd259f4fb68fba6cd3090c6eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:03 +0100 Subject: [PATCH 0178/1418] arm64: dts: qcom: sm8150: fix USB SS wakeup [ Upstream commit cc4e1da491b84ca05339a19893884cda78f74aef ] The USB SS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states. Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes") Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes") Cc: stable@vger.kernel.org # 5.10 Cc: Jack Pham Cc: Jonathan Marek Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 8efd0e227d780..eb1a9369926d2 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3629,7 +3629,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -3678,7 +3678,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", -- GitLab From c6feb7f41728d3a830ed244a730d932aad590db4 Mon Sep 17 00:00:00 2001 From: David Lin Date: Thu, 21 Dec 2023 09:55:11 +0800 Subject: [PATCH 0179/1418] wifi: mwifiex: fix uninitialized firmware_stat [ Upstream commit 3df95e265924ac898c1a38a0c01846dd0bd3b354 ] Variable firmware_stat is possible to be used without initialization. Signed-off-by: David Lin Fixes: 1c5d463c0770 ("wifi: mwifiex: add extra delay for firmware ready") Cc: stable@vger.kernel.org Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202312192236.ZflaWYCw-lkp@intel.com/ Acked-by: Brian Norris Signed-off-by: Kalle Valo Link: https://msgid.link/20231221015511.1032128-1-yu-hao.lin@nxp.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index e55747b50dbfb..2c9b70e9a7263 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -779,7 +779,7 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, { struct sdio_mmc_card *card = adapter->card; int ret = 0; - u16 firmware_stat; + u16 firmware_stat = 0; u32 tries; for (tries = 0; tries < poll_num; tries++) { -- GitLab From bb44477d4506e52785693a39f03cdc6a2c5e8598 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 14 Dec 2023 11:08:34 +0800 Subject: [PATCH 0180/1418] crypto: lib/mpi - Fix unexpected pointer access in mpi_ec_init [ Upstream commit ba3c5574203034781ac4231acf117da917efcd2a ] When the mpi_ec_ctx structure is initialized, some fields are not cleared, causing a crash when referencing the field when the structure was released. Initially, this issue was ignored because memory for mpi_ec_ctx is allocated with the __GFP_ZERO flag. For example, this error will be triggered when calculating the Za value for SM2 separately. Fixes: d58bb7e55a8a ("lib/mpi: Introduce ec implementation to MPI library") Cc: stable@vger.kernel.org # v6.5 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- lib/mpi/ec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e57a4f..e16dca1e23d52 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); -- GitLab From e7d2e87abc6fddec0e661f9fd8c1657db42d674b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 10 Jan 2024 18:29:42 +0900 Subject: [PATCH 0181/1418] block: fix partial zone append completion handling in req_bio_endio() [ Upstream commit 748dc0b65ec2b4b7b3dbd7befcc4a54fdcac7988 ] Partial completions of zone append request is not allowed but if a zone append completion indicates a number of completed bytes different from the original BIO size, only the BIO status is set to error. This leads to bio_advance() not setting the BIO size to 0 and thus to not call bio_endio() at the end of req_bio_endio(). Make sure a partially completed zone append is failed and completed immediately by forcing the completed number of bytes (nbytes) to be equal to the BIO size, thus ensuring that bio_endio() is called. Fixes: 297db731847e ("block: fix req_bio_endio append error handling") Cc: stable@kernel.vger.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240110092942.442334-1-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c07e5eebcbd85..7ed6b9469f979 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -747,11 +747,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); -- GitLab From 653bc5e6d9995d7d5f497c665b321875a626161c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 29 Jan 2024 10:57:01 +0100 Subject: [PATCH 0182/1418] netfilter: ipset: fix performance regression in swap operation commit 97f7cf1cd80eeed3b7c808b7c12463295c751001 upstream. The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++-- net/netfilter/ipset/ip_set_core.c | 37 +++++++++++++++++++------ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++-- net/netfilter/ipset/ip_set_list_set.c | 13 +++++++-- 5 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 72f5ebc5c97a9..0b217d4ae2a48 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 26ab0e9612d82..9523104a90da4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d47dfdcb899b0..bddd1131e825a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1184,6 +1184,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1195,8 +1203,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1208,8 +1214,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1223,6 +1231,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1230,6 +1240,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1240,10 +1253,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1396,9 +1415,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2411,8 +2427,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7499192af5866..48fda58b91a0e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -210,6 +210,7 @@ htable_size(u8 hbits) #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -254,6 +255,7 @@ htable_size(u8 hbits) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -437,9 +439,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -586,6 +585,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1416,6 +1424,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 5a67f79665742..6bc7019982b05 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void -- GitLab From ebc442c6403d9b3945f9d40e582677231ab23f45 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 4 Feb 2024 16:26:42 +0100 Subject: [PATCH 0183/1418] netfilter: ipset: Missing gc cancellations fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 27c5a095e2518975e20a10102908ae8231699879 upstream. The patch fdb8e12cc2cc ("netfilter: ipset: fix performance regression in swap operation") missed to add the calls to gc cancellations at the error path of create operations and at module unload. Also, because the half of the destroy operations now executed by a function registered by call_rcu(), neither NFNL_SUBSYS_IPSET mutex or rcu read lock is held and therefore the checking of them results false warnings. Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") Reported-by: syzbot+52bbc0ad036f6f0d4a25@syzkaller.appspotmail.com Reported-by: Brad Spengler Reported-by: Стас Ничипорович Tested-by: Brad Spengler Tested-by: Стас Ничипорович Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_core.c | 2 ++ net/netfilter/ipset/ip_set_hash_gen.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bddd1131e825a..f645da82d826e 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1156,6 +1156,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -2380,6 +2381,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 48fda58b91a0e..ef04e556aadb4 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -419,7 +419,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -439,7 +439,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); -- GitLab From 23027309b099ffc4efca5477009a11dccbdae592 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Jan 2024 15:29:27 +0100 Subject: [PATCH 0184/1418] parisc: Fix random data corruption from exception handler commit 8b1d72395635af45410b66cc4c4ab37a12c4a831 upstream. The current exception handler implementation, which assists when accessing user space memory, may exhibit random data corruption if the compiler decides to use a different register than the specified register %r29 (defined in ASM_EXCEPTIONTABLE_REG) for the error code. If the compiler choose another register, the fault handler will nevertheless store -EFAULT into %r29 and thus trash whatever this register is used for. Looking at the assembly I found that this happens sometimes in emulate_ldd(). To solve the issue, the easiest solution would be if it somehow is possible to tell the fault handler which register is used to hold the error code. Using %0 or %1 in the inline assembly is not posssible as it will show up as e.g. %r29 (with the "%r" prefix), which the GNU assembler can not convert to an integer. This patch takes another, better and more flexible approach: We extend the __ex_table (which is out of the execution path) by one 32-word. In this word we tell the compiler to insert the assembler instruction "or %r0,%r0,%reg", where %reg references the register which the compiler choosed for the error return code. In case of an access failure, the fault handler finds the __ex_table entry and can examine the opcode. The used register is encoded in the lowest 5 bits, and the fault handler can then store -EFAULT into this register. Since we extend the __ex_table to 3 words we can't use the BUILDTIME_TABLE_SORT config option any longer. Signed-off-by: Helge Deller Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/assembly.h | 1 + arch/parisc/include/asm/extable.h | 64 +++++++++++++++++++++++++ arch/parisc/include/asm/special_insns.h | 6 ++- arch/parisc/include/asm/uaccess.h | 48 +++---------------- arch/parisc/kernel/unaligned.c | 44 ++++++++--------- arch/parisc/mm/fault.c | 11 +++-- 7 files changed, 106 insertions(+), 69 deletions(-) create mode 100644 arch/parisc/include/asm/extable.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 345d5e021484c..abf39ecda6fb1 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -24,7 +24,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759da..5937d5edaba1e 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 0000000000000..4ea23e3d79dc9 --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c6..51f40eaf77806 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9e..88d0ae5769dde 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index e8a4d77cff53a..8a8e7d7224a26 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -118,8 +118,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -150,8 +150,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -187,8 +187,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -207,9 +207,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -242,8 +242,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -283,8 +283,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -327,10 +327,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -356,11 +356,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index b00aa98b582c2..fbd9ada5e527e 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { -- GitLab From e4cf8941664cae2f89f0189c29fe2ce8c6be0d03 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Jan 2024 14:58:16 +1100 Subject: [PATCH 0185/1418] nfsd: fix RELEASE_LOCKOWNER [ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ] The test on so_count in nfsd4_release_lockowner() is nonsense and harmful. Revert to using check_for_locks(), changing that to not sleep. First: harmful. As is documented in the kdoc comment for nfsd4_release_lockowner(), the test on so_count can transiently return a false positive resulting in a return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is clearly a protocol violation and with the Linux NFS client it can cause incorrect behaviour. If RELEASE_LOCKOWNER is sent while some other thread is still processing a LOCK request which failed because, at the time that request was received, the given owner held a conflicting lock, then the nfsd thread processing that LOCK request can hold a reference (conflock) to the lock owner that causes nfsd4_release_lockowner() to return an incorrect error. The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so it knows that the error is impossible. It assumes the lock owner was in fact released so it feels free to use the same lock owner identifier in some later locking request. When it does reuse a lock owner identifier for which a previous RELEASE failed, it will naturally use a lock_seqid of zero. However the server, which didn't release the lock owner, will expect a larger lock_seqid and so will respond with NFS4ERR_BAD_SEQID. So clearly it is harmful to allow a false positive, which testing so_count allows. The test is nonsense because ... well... it doesn't mean anything. so_count is the sum of three different counts. 1/ the set of states listed on so_stateids 2/ the set of active vfs locks owned by any of those states 3/ various transient counts such as for conflicting locks. When it is tested against '2' it is clear that one of these is the transient reference obtained by find_lockowner_str_locked(). It is not clear what the other one is expected to be. In practice, the count is often 2 because there is precisely one state on so_stateids. If there were more, this would fail. In my testing I see two circumstances when RELEASE_LOCKOWNER is called. In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in all the lock states being removed, and so the lockowner being discarded (it is removed when there are no more references which usually happens when the lock state is discarded). When nfsd4_release_lockowner() finds that the lock owner doesn't exist, it returns success. The other case shows an so_count of '2' and precisely one state listed in so_stateid. It appears that the Linux client uses a separate lock owner for each file resulting in one lock state per lock owner, so this test on '2' is safe. For another client it might not be safe. So this patch changes check_for_locks() to use the (newish) find_any_file_locked() so that it doesn't take a reference on the nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With this check is it safe to restore the use of check_for_locks() rather than testing so_count against the mysterious '2'. Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # v6.2+ Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index faecdbfa01a29..0443fe4e29e11 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7736,14 +7736,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = locks_inode(nf->nf_file); @@ -7759,7 +7761,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7769,10 +7772,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7808,10 +7809,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { -- GitLab From 51a8f31b939c21994f43e2d01e1a97719c8685df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Feb 2024 13:22:39 +1100 Subject: [PATCH 0186/1418] nfsd: don't take fi_lock in nfsd_break_deleg_cb() [ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ] A recent change to check_for_locks() changed it to take ->flc_lock while holding ->fi_lock. This creates a lock inversion (reported by lockdep) because there is a case where ->fi_lock is taken while holding ->flc_lock. ->flc_lock is held across ->fl_lmops callbacks, and nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However it doesn't need to. Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list and so needed the lock. Since then it doesn't walk the list and doesn't need the lock. Two actions are performed under the lock. One is to call nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on the nfs4_file at all, so don't need the lock. The other is to set ->fi_had_conflict which is in the nfs4_file. This field is only ever set here (except when initialised to false) so there is no possible problem will multiple threads racing when setting it. The field is tested twice in nfs4_set_delegation(). The first test does not hold a lock and is documented as an opportunistic optimisation, so it doesn't impose any need to hold ->fi_lock while setting ->fi_had_conflict. The second test in nfs4_set_delegation() *is* make under ->fi_lock, so removing the locking when ->fi_had_conflict is set could make a change. The change could only be interesting if ->fi_had_conflict tested as false even though nfsd_break_one_deleg() ran before ->fi_lock was unlocked. i.e. while hash_delegation_locked() was running. As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb() there can be no importance to this interaction. So this patch removes the locking from nfsd_break_one_deleg() and moves the final test on ->fi_had_conflict out of the locked region to make it clear that locking isn't important to the test. It is still tested *after* vfs_setlease() has succeeded. This might be significant and as vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called under ->flc_lock this "after" is a true ordering provided by a spinlock. Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0443fe4e29e11..b3f6dda930d8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4908,10 +4908,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5499,12 +5497,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); -- GitLab From e1c1bdaa387976cb577769a58abce5ef3e2bc88d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Jan 2023 09:32:06 -0800 Subject: [PATCH 0187/1418] hrtimer: Ignore slack time for RT tasks in schedule_hrtimeout_range() commit 0c52310f260014d95c1310364379772cb74cf82d upstream. While in theory the timer can be triggered before expires + delta, for the cases of RT tasks they really have no business giving any lenience for extra slack time, so override any passed value by the user and always use zero for schedule_hrtimeout_range() calls. Furthermore, this is similar to what the nanosleep(2) family already does with current->timer_slack_ns. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230123173206.6764-3-dave@stgolabs.net Signed-off-by: Felix Moessbauer Signed-off-by: Greg Kroah-Hartman --- kernel/time/hrtimer.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8e0aff1d1ea4f..9bb88836c42e6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2266,7 +2266,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2293,6 +2293,13 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } + /* + * Override any slack passed by the user if under + * rt contraints. + */ + if (rt_task(current)) + delta = 0; + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2312,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * * Make the current task sleep until the given expiry time has @@ -2320,7 +2327,8 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. + * actual wakeup to a time that is both power and performance friendly + * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * -- GitLab From 270325fb3154721f944fb9e1eca8fffc78d65f28 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:42 -0600 Subject: [PATCH 0188/1418] RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned commit 0a5ec366de7e94192669ba08de6ed336607fd282 upstream. The SQ is shared for between kernel and used by storing the kernel page pointer and passing that to a kmap_atomic(). This then requires that the alignment is PAGE_SIZE aligned. Fix by adding an iWarp specific alignment check. Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp") Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/irdma/verbs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 4859b99d54fc2..01faec6ea5285 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2845,6 +2845,13 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) { + err = -EINVAL; + goto error; + } + total = req.sq_pages + req.rq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; -- GitLab From 1ae3c59355dc9882e09c020afe8ffbd895ad0f29 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:41 -0300 Subject: [PATCH 0189/1418] smb: client: fix potential OOBs in smb2_parse_contexts() commit af1689a9b7701d9907dfc84d2a4b57c4bc907144 upstream. Validate offsets and lengths before dereferencing create contexts in smb2_parse_contexts(). This fixes following oops when accessing invalid create contexts from server: BUG: unable to handle page fault for address: ffff8881178d8cc3 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1736 Comm: mount.cifs Not tainted 6.7.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_parse_contexts+0xa0/0x3a0 [cifs] Code: f8 10 75 13 48 b8 93 ad 25 50 9c b4 11 e7 49 39 06 0f 84 d2 00 00 00 8b 45 00 85 c0 74 61 41 29 c5 48 01 c5 41 83 fd 0f 76 55 <0f> b7 7d 04 0f b7 45 06 4c 8d 74 3d 00 66 83 f8 04 75 bc ba 04 00 RSP: 0018:ffffc900007939e0 EFLAGS: 00010216 RAX: ffffc90000793c78 RBX: ffff8880180cc000 RCX: ffffc90000793c90 RDX: ffffc90000793cc0 RSI: ffff8880178d8cc0 RDI: ffff8880180cc000 RBP: ffff8881178d8cbf R08: ffffc90000793c22 R09: 0000000000000000 R10: ffff8880180cc000 R11: 0000000000000024 R12: 0000000000000000 R13: 0000000000000020 R14: 0000000000000000 R15: ffffc90000793c22 FS: 00007f873753cbc0(0000) GS:ffff88806bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8881178d8cc3 CR3: 00000000181ca000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? smb2_parse_contexts+0xa0/0x3a0 [cifs] SMB2_open+0x38d/0x5f0 [cifs] ? smb2_is_path_accessible+0x138/0x260 [cifs] smb2_is_path_accessible+0x138/0x260 [cifs] cifs_is_path_remote+0x8d/0x230 [cifs] cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f8737657b1e Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French [Guru: Modified the patch to be applicable to the cached_dir.c file.] Signed-off-by: Guruswamy Basavaiah Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 8 ++-- fs/smb/client/smb2pdu.c | 93 +++++++++++++++++++++++--------------- fs/smb/client/smb2proto.h | 12 +++-- 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5a132c1e6f6c4..6f4d7aa70e5a2 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -268,10 +268,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) goto oshr_free; - smb2_parse_contexts(server, o_rsp, + rc = smb2_parse_contexts(server, rsp_iov, &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) + goto oshr_free; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) goto oshr_free; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e65f998ea4cfc..d610862ac6a05 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2145,17 +2145,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2164,45 +2165,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen >= doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3082,8 +3101,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index be21b5d26f67e..b325fde010adc 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -249,11 +249,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); -- GitLab From 380aeff204b903502582019ff067caccbd3399b3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Jan 2024 01:08:26 -0300 Subject: [PATCH 0190/1418] smb: client: fix parsing of SMB3.1.1 POSIX create context commit 76025cc2285d9ede3d717fe4305d66f8be2d9346 upstream. The data offset for the SMB3.1.1 POSIX create context will always be 8-byte aligned so having the check 'noff + nlen >= doff' in smb2_parse_contexts() is wrong as it will lead to -EINVAL because noff + nlen == doff. Fix the sanity check to correctly handle aligned create context data. Fixes: af1689a9b770 ("smb: client: fix potential OOBs in smb2_parse_contexts()") Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Guruswamy Basavaiah Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index d610862ac6a05..c1fc1651d8b69 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2184,7 +2184,7 @@ int smb2_parse_contexts(struct TCP_Server_Info *server, noff = le16_to_cpu(cc->NameOffset); nlen = le16_to_cpu(cc->NameLength); - if (noff + nlen >= doff) + if (noff + nlen > doff) return -EINVAL; name = (char *)cc + noff; -- GitLab From 989b0ff35fe5fc9652ee5bafbe8483db6f27b137 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2023 16:46:21 +0000 Subject: [PATCH 0191/1418] net: prevent mss overflow in skb_segment() commit 23d05d563b7e7b0314e65c8e882bc27eac2da8e7 upstream. Once again syzbot is able to crash the kernel in skb_segment() [1] GSO_BY_FRAGS is a forbidden value, but unfortunately the following computation in skb_segment() can reach it quite easily : mss = mss * partial_segs; 65535 = 3 * 5 * 17 * 257, so many initial values of mss can lead to a bad final result. Make sure to limit segmentation so that the new mss value is smaller than GSO_BY_FRAGS. [1] general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 1 PID: 5079 Comm: syz-executor993 Not tainted 6.7.0-rc4-syzkaller-00141-g1ae4cd3cbdd0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: udp6_ufo_fragment+0xa0e/0xd00 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x534/0x17e0 net/ipv6/ip6_offload.c:120 skb_mac_gso_segment+0x290/0x610 net/core/gso.c:53 __skb_gso_segment+0x339/0x710 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x36c/0xeb0 net/core/dev.c:3626 __dev_queue_xmit+0x6f3/0x3d60 net/core/dev.c:4338 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x257/0x380 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x24c6/0x5220 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f8692032aa9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 d1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff8d685418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f8692032aa9 RDX: 0000000000010048 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 00000000000f4240 R08: 0000000020000540 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff8d685480 R13: 0000000000000001 R14: 00007fff8d685480 R15: 0000000000000003 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231212164621.4131800-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a819d0a7bfb0..d4bd10f8723df 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4213,8 +4213,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; if (partial_segs > 1) mss *= partial_segs; else -- GitLab From f7bbad9561f32dda2c13f6c4d0ca77d301f1c123 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:28 +0100 Subject: [PATCH 0192/1418] bpf: Add struct for bin_args arg in bpf_bprintf_prepare commit 78aa1cc9404399a15d2a1205329c6a06236f5378 upstream. Adding struct bpf_bprintf_data to hold bin_args argument for bpf_bprintf_prepare function. We will add another return argument to bpf_bprintf_prepare and pass the struct to bpf_bprintf_cleanup for proper cleanup in following changes. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-2-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 7 ++++++- kernel/bpf/helpers.c | 24 +++++++++++++----------- kernel/bpf/verifier.c | 3 ++- kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++-------------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c04a61ffac8ae..0e4fb16cd5c1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2740,8 +2740,13 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +struct bpf_bprintf_data { + u32 *bin_args; + bool get_bin_args; +}; + int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); + u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(void); /* the implementation of the opaque uapi struct bpf_dynptr */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 34135fbd6097e..42141d59e9c67 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -795,16 +795,16 @@ void bpf_bprintf_cleanup(void) * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: - * - Format string verification only: when bin_args is NULL + * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in - * bin_args a binary representation of arguments usable by bstr_printf where - * pointers from BPF have been sanitized. + * data->bin_args a binary representation of arguments usable by bstr_printf + * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 num_args, struct bpf_bprintf_data *data) { char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; size_t sizeof_cur_arg, sizeof_cur_ip; @@ -817,12 +817,12 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { + if (data->get_bin_args) { if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) return -EBUSY; tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; - *bin_args = (u32 *)tmp_buf; + data->bin_args = (u32 *)tmp_buf; } for (i = 0; i < fmt_size; i++) { @@ -1023,24 +1023,26 @@ out: } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; - err = bstr_printf(str, str_size, fmt, bin_args); + err = bstr_printf(str, str_size, fmt, data.bin_args); bpf_bprintf_cleanup(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 23b6d57b5eef2..1a29ac4db6eae 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7448,6 +7448,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7472,7 +7473,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f4a494a457c52..bbf44095999f6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -377,18 +377,20 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); @@ -426,25 +428,27 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); @@ -471,21 +475,23 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); bpf_bprintf_cleanup(); -- GitLab From 95b7476f6f68d725c474e3348e89436b0abde62a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:29 +0100 Subject: [PATCH 0193/1418] bpf: Do cleanup in bpf_bprintf_cleanup only when needed commit f19a4050455aad847fb93f18dc1fe502eb60f989 upstream. Currently we always cleanup/decrement bpf_bprintf_nest_level variable in bpf_bprintf_cleanup if it's > 0. There's possible scenario where this could cause a problem, when bpf_bprintf_prepare does not get bin_args buffer (because num_args is 0) and following bpf_bprintf_cleanup call decrements bpf_bprintf_nest_level variable, like: in task context: bpf_bprintf_prepare(num_args != 0) increments 'bpf_bprintf_nest_level = 1' -> first irq : bpf_bprintf_prepare(num_args == 0) bpf_bprintf_cleanup decrements 'bpf_bprintf_nest_level = 0' -> second irq: bpf_bprintf_prepare(num_args != 0) bpf_bprintf_nest_level = 1 gets same buffer as task context above Adding check to bpf_bprintf_cleanup and doing the real cleanup only if we got bin_args data in the first place. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-3-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 2 +- kernel/bpf/helpers.c | 16 +++++++++------- kernel/trace/bpf_trace.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0e4fb16cd5c1f..bc4e6969899af 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2747,7 +2747,7 @@ struct bpf_bprintf_data { int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); -void bpf_bprintf_cleanup(void); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 42141d59e9c67..64f41f58b37b7 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -781,12 +781,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) return 0; } -void bpf_bprintf_cleanup(void) +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (this_cpu_read(bpf_bprintf_nest_level)) { - this_cpu_dec(bpf_bprintf_nest_level); - preempt_enable(); - } + if (!data->bin_args) + return; + if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) + return; + this_cpu_dec(bpf_bprintf_nest_level); + preempt_enable(); } /* @@ -1018,7 +1020,7 @@ nocopy_fmt: err = 0; out: if (err) - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(data); return err; } @@ -1044,7 +1046,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, err = bstr_printf(str, str_size, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return err + 1; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bbf44095999f6..263a54d0d9eec 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -395,7 +395,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -453,7 +453,7 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -493,7 +493,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } -- GitLab From f3e975828636794a9d4cc27adb14a2f66592d414 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:30 +0100 Subject: [PATCH 0194/1418] bpf: Remove trace_printk_lock commit e2bb9e01d589f7fa82573aedd2765ff9b277816a upstream. Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer guarded with trace_printk_lock spin lock. The spin lock contention causes issues with bpf programs attached to contention_begin tracepoint [1][2]. Andrii suggested we could get rid of the contention by using trylock, but we could actually get rid of the spinlock completely by using percpu buffers the same way as for bin_args in bpf_bprintf_prepare function. Adding new return 'buf' argument to struct bpf_bprintf_data and making bpf_bprintf_prepare to return also the buffer for printk helpers. [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/ Reported-by: Hao Sun Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-4-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 3 +++ kernel/bpf/helpers.c | 31 +++++++++++++++++++------------ kernel/trace/bpf_trace.c | 20 ++++++-------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bc4e6969899af..1ca1902af23e9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2739,10 +2739,13 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 struct bpf_bprintf_data { u32 *bin_args; + char *buf; bool get_bin_args; + bool get_buf; }; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 64f41f58b37b7..6a61a98d602cd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -753,19 +753,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -775,15 +776,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (!data->bin_args) + if (!data->bin_args && !data->buf) return; if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) return; @@ -808,7 +808,9 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data) { + bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; @@ -819,14 +821,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (data->get_bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; + if (data->get_bin_args) { + if (num_args) + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; data->bin_args = (u32 *)tmp_buf; } + if (data->get_buf) + data->buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 263a54d0d9eec..3fdde232eaa92 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -368,8 +368,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -379,9 +377,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; struct bpf_bprintf_data data = { .get_bin_args = true, + .get_buf = true, }; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; int ret; ret = bpf_bprintf_prepare(fmt, fmt_size, args, @@ -389,11 +386,9 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); @@ -433,9 +428,8 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, { struct bpf_bprintf_data data = { .get_bin_args = true, + .get_buf = true, }; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; int ret, num_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || @@ -447,11 +441,9 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); -- GitLab From a160c3293a1cce15d5bb1e5886480d7d416b7353 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Wed, 17 Jan 2024 14:37:29 -0800 Subject: [PATCH 0195/1418] userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb commit 67695f18d55924b2013534ef3bdc363bc9e14605 upstream. In mfill_atomic_hugetlb(), mmap_changing isn't being checked again if we drop mmap_lock and reacquire it. When the lock is not held, mmap_changing could have been incremented. This is also inconsistent with the behavior in mfill_atomic(). Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Signed-off-by: Lokesh Gidra Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 650ab6cfd5f49..992a0a16846f7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -327,6 +327,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy) { @@ -445,6 +446,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -480,6 +490,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy); #endif /* CONFIG_HUGETLB_PAGE */ @@ -601,8 +612,8 @@ retry: */ if (is_vm_hugetlb_page(dst_vma)) return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, - src_start, len, mcopy_mode, - wp_copy); + src_start, len, mmap_changing, + mcopy_mode, wp_copy); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; -- GitLab From 43ec3c888653404c492749b971e56782ca4e574b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Sep 2022 11:32:41 +0200 Subject: [PATCH 0196/1418] dmaengine: ioat: Free up __cleanup() name commit f62141ac730d6fe73a05750cb4482aabb681cfb9 upstream. In order to use __cleanup for __attribute__((__cleanup__(func))) the name must not be used for anything else. Avoid the conflict. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dave Jiang Link: https://lkml.kernel.org/r/20230612093537.467120754%40infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/dma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad28..0b846c605d4bd 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -584,11 +584,11 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) } /** - * __cleanup - reclaim used descriptors + * __ioat_cleanup - reclaim used descriptors * @ioat_chan: channel (ring) to clean * @phys_complete: zeroed (or not) completion address (from status) */ -static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; struct ioat_ring_ent *desc; @@ -675,7 +675,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); if (is_ioat_halted(*ioat_chan->completion)) { u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -712,7 +712,7 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) ioat_quiesce(ioat_chan, 0); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); __ioat_restart_chan(ioat_chan); } @@ -786,7 +786,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan) /* cleanup so tail points to descriptor that caused the error */ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); @@ -943,7 +943,7 @@ void ioat_timer_event(struct timer_list *t) /* timer restarted in ioat_cleanup_preamble * and IOAT_COMPLETION_ACK cleared */ - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); goto unlock_out; } -- GitLab From 579cfab21b59fbf4bba2a564c5810ad72e7f868a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 09:48:59 +0200 Subject: [PATCH 0197/1418] apparmor: Free up __cleanup() name commit 9a1f37ebcfe061721564042254719dc8fd5c9fa0 upstream. In order to use __cleanup for __attribute__((__cleanup__(func))) the name must not be used for anything else. Avoid the conflict. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Johansen Link: https://lkml.kernel.org/r/20230612093537.536441207%40infradead.org Signed-off-by: Greg Kroah-Hartman --- security/apparmor/include/lib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f42359f58eb58..d468c8b90298d 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -226,7 +226,7 @@ void aa_policy_destroy(struct aa_policy *policy); */ #define fn_label_build(L, P, GFP, FN) \ ({ \ - __label__ __cleanup, __done; \ + __label__ __do_cleanup, __done; \ struct aa_label *__new_; \ \ if ((L)->size > 1) { \ @@ -244,7 +244,7 @@ void aa_policy_destroy(struct aa_policy *policy); __new_ = (FN); \ AA_BUG(!__new_); \ if (IS_ERR(__new_)) \ - goto __cleanup; \ + goto __do_cleanup; \ __lvec[__j++] = __new_; \ } \ for (__j = __count = 0; __j < (L)->size; __j++) \ @@ -266,7 +266,7 @@ void aa_policy_destroy(struct aa_policy *policy); vec_cleanup(profile, __pvec, __count); \ } else \ __new_ = NULL; \ -__cleanup: \ +__do_cleanup: \ vec_cleanup(label, __lvec, (L)->size); \ } else { \ (P) = labels_profile(L); \ -- GitLab From 3c6cc62ce1265aa5623e2e1b29c0fe258bf6e232 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 May 2023 12:23:48 +0200 Subject: [PATCH 0198/1418] locking: Introduce __cleanup() based infrastructure commit 54da6a0924311c7cf5015533991e44fb8eb12773 upstream. Use __attribute__((__cleanup__(func))) to build: - simple auto-release pointers using __free() - 'classes' with constructor and destructor semantics for scope-based resource management. - lock guards based on the above classes. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230612093537.614161713%40infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/cleanup.h | 171 ++++++++++++++++++++++++++++ include/linux/compiler-clang.h | 9 ++ include/linux/compiler_attributes.h | 6 + include/linux/device.h | 7 ++ include/linux/file.h | 6 + include/linux/irqflags.h | 7 ++ include/linux/mutex.h | 4 + include/linux/percpu.h | 4 + include/linux/preempt.h | 5 + include/linux/rcupdate.h | 3 + include/linux/rwsem.h | 8 ++ include/linux/sched/task.h | 2 + include/linux/slab.h | 3 + include/linux/spinlock.h | 31 +++++ include/linux/srcu.h | 5 + scripts/checkpatch.pl | 2 +- 16 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 include/linux/cleanup.h diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h new file mode 100644 index 0000000000000..53f1a7a932b08 --- /dev/null +++ b/include/linux/cleanup.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GUARDS_H +#define __LINUX_GUARDS_H + +#include + +/* + * DEFINE_FREE(name, type, free): + * simple helper macro that defines the required wrapper for a __free() + * based cleanup function. @free is an expression using '_T' to access + * the variable. + * + * __free(name): + * variable attribute to add a scoped based cleanup to the variable. + * + * no_free_ptr(var): + * like a non-atomic xchg(var, NULL), such that the cleanup function will + * be inhibited -- provided it sanely deals with a NULL value. + * + * return_ptr(p): + * returns p while inhibiting the __free(). + * + * Ex. + * + * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + * + * struct obj *p __free(kfree) = kmalloc(...); + * if (!p) + * return NULL; + * + * if (!init_obj(p)) + * return NULL; + * + * return_ptr(p); + */ + +#define DEFINE_FREE(_name, _type, _free) \ + static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; } + +#define __free(_name) __cleanup(__free_##_name) + +#define no_free_ptr(p) \ + ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + +#define return_ptr(p) return no_free_ptr(p) + + +/* + * DEFINE_CLASS(name, type, exit, init, init_args...): + * helper to define the destructor and constructor for a type. + * @exit is an expression using '_T' -- similar to FREE above. + * @init is an expression in @init_args resulting in @type + * + * EXTEND_CLASS(name, ext, init, init_args...): + * extends class @name to @name@ext with the new constructor + * + * CLASS(name, var)(args...): + * declare the variable @var as an instance of the named class + * + * Ex. + * + * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) + * + * CLASS(fdget, f)(fd); + * if (!f.file) + * return -EBADF; + * + * // use 'f' without concern + */ + +#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \ +typedef _type class_##_name##_t; \ +static inline void class_##_name##_destructor(_type *p) \ +{ _type _T = *p; _exit; } \ +static inline _type class_##_name##_constructor(_init_args) \ +{ _type t = _init; return t; } + +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ +typedef class_##_name##_t class_##_name##ext##_t; \ +static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\ +{ class_##_name##_destructor(p); } \ +static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ +{ class_##_name##_t t = _init; return t; } + +#define CLASS(_name, var) \ + class_##_name##_t var __cleanup(class_##_name##_destructor) = \ + class_##_name##_constructor + + +/* + * DEFINE_GUARD(name, type, lock, unlock): + * trivial wrapper around DEFINE_CLASS() above specifically + * for locks. + * + * guard(name): + * an anonymous instance of the (guard) class + * + * scoped_guard (name, args...) { }: + * similar to CLASS(name, scope)(args), except the variable (with the + * explicit name 'scope') is declard in a for-loop such that its scope is + * bound to the next (compound) statement. + * + */ + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + +#define guard(_name) \ + CLASS(_name, __UNIQUE_ID(guard)) + +#define scoped_guard(_name, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) + +/* + * Additional helper macros for generating lock guards with types, either for + * locks that don't have a native type (eg. RCU, preempt) or those that need a + * 'fat' pointer (eg. spin_lock_irqsave). + * + * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * + * will result in the following type: + * + * typedef struct { + * type *lock; // 'type := void' for the _0 variant + * __VA_ARGS__; + * } class_##name##_t; + * + * As above, both _lock and _unlock are statements, except this time '_T' will + * be a pointer to the above struct. + */ + +#define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...) \ +typedef struct { \ + _type *lock; \ + __VA_ARGS__; \ +} class_##_name##_t; \ + \ +static inline void class_##_name##_destructor(class_##_name##_t *_T) \ +{ \ + if (_T->lock) { _unlock; } \ +} + + +#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(_type *l) \ +{ \ + class_##_name##_t _t = { .lock = l }, *_T = &_t; \ + _lock; \ + return _t; \ +} + +#define __DEFINE_LOCK_GUARD_0(_name, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(void) \ +{ \ + class_##_name##_t _t = { .lock = (void*)1 }, \ + *_T __maybe_unused = &_t; \ + _lock; \ + return _t; \ +} + +#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_1(_name, _type, _lock) + +#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_0(_name, _lock) + +#endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 6cfd6902bd5b9..9b673fefcef8a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -5,6 +5,15 @@ /* Compiler specific definitions for Clang compiler */ +/* + * Clang prior to 17 is being silly and considers many __cleanup() variables + * as unused (because they are, their sole purpose is to go out of scope). + * + * https://reviews.llvm.org/D152180 + */ +#undef __cleanup +#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) + /* same as gcc, this was present in clang-2.6 so we can assume it works * with any version that can compile the kernel */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a0..ae4c9579ca5f0 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -75,6 +75,12 @@ # define __assume_aligned(a, ...) #endif +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup + */ +#define __cleanup(func) __attribute__((__cleanup__(func))) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute diff --git a/include/linux/device.h b/include/linux/device.h index 7cf24330d6814..5520bb546a4ac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -30,6 +30,7 @@ #include #include #include +#include #include struct device; @@ -898,6 +899,9 @@ void device_unregister(struct device *dev); void device_initialize(struct device *dev); int __must_check device_add(struct device *dev); void device_del(struct device *dev); + +DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) + int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); int device_for_each_child_reverse(struct device *dev, void *data, @@ -1071,6 +1075,9 @@ extern int (*platform_notify_remove)(struct device *dev); */ struct device *get_device(struct device *dev); void put_device(struct device *dev); + +DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T)) + bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS diff --git a/include/linux/file.h b/include/linux/file.h index 39704eae83e27..6e9099d293436 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -10,6 +10,7 @@ #include #include #include +#include struct file; @@ -80,6 +81,8 @@ static inline void fdput_pos(struct fd f) fdput(f); } +DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); @@ -88,6 +91,9 @@ extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); +DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), + get_unused_fd_flags(flags), unsigned flags) + extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(struct file *file, int __user *ufd, diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa71399e4..2b665c32f5fe6 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -13,6 +13,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include +#include #include #include @@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void); #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable()) +DEFINE_LOCK_GUARD_0(irqsave, + local_irq_save(_T->flags), + local_irq_restore(_T->flags), + unsigned long flags) + #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8f226d460f51c..a33aa9eb9fc3b 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -219,4 +220,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) +DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351cc..ba00a49369cae 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu_gfp(type, gfp) \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8cfcc5d454512..9aa6358a1a16b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -8,6 +8,7 @@ */ #include +#include #include /* @@ -474,4 +475,8 @@ static __always_inline void preempt_enable_nested(void) preempt_enable(); } +DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) +DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 46bd9a331fd5d..d2507168b9c7b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1077,4 +1078,6 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index efa5c324369a2..1dd530ce8b45b 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ @@ -201,6 +202,13 @@ extern void up_read(struct rw_semaphore *sem); */ extern void up_write(struct rw_semaphore *sem); +DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) + +DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) +DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) + + /* * downgrade write lock to read lock */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 7291fb6399d2a..aaa25ed1a8fe0 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -145,6 +145,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) + static inline void put_task_struct_many(struct task_struct *t, int nr) { if (refcount_sub_and_test(nr, &t->usage)) diff --git a/include/linux/slab.h b/include/linux/slab.h index 45efc6c553b82..cb4b5deca9a9c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -17,6 +17,7 @@ #include #include #include +#include /* @@ -197,6 +198,8 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); +DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + /** * ksize - Report actual allocation size of associated object * diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1341f7d62da44..83377540c369a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -493,5 +494,35 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, + raw_spin_lock(_T->lock), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, + raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, + raw_spin_lock_irq(_T->lock), + raw_spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, + raw_spin_lock_irqsave(_T->lock, _T->flags), + raw_spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, + spin_lock_irq(_T->lock), + spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, + spin_lock_irqsave(_T->lock, _T->flags), + spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a0..f9e1fa7ff86fc 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -212,4 +212,9 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, + _T->idx = srcu_read_lock(_T->lock), + srcu_read_unlock(_T->lock, _T->idx), + int idx) + #endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1e5e66ae5a522..ecf4250b0d2d2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4971,7 +4971,7 @@ sub process { if|for|while|switch|return|case| volatile|__volatile__| __attribute__|format|__extension__| - asm|__asm__)$/x) + asm|__asm__|scoped_guard)$/x) { # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open -- GitLab From d3a5f798bc866dae270c3f2a863dc3d75629ffd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 11:28:30 +0200 Subject: [PATCH 0199/1418] kbuild: Drop -Wdeclaration-after-statement commit b5ec6fd286dfa466f64cb0e56ed768092d0342ae upstream. With the advent on scope-based resource management it comes really tedious to abide by the contraints of -Wdeclaration-after-statement. It will still be recommeneded to place declarations at the start of a scope where possible, but it will no longer be enforced. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-%3Dwi-RyoUhbChiVaJZoZXheAwnJ7OO%3DGxe85BkPAd93TwDA%40mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- Makefile | 6 +----- arch/arm64/kernel/vdso32/Makefile | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e93554269e474..1c23f38fc5d84 100644 --- a/Makefile +++ b/Makefile @@ -459,8 +459,7 @@ HOSTRUSTC = rustc HOSTPKG_CONFIG = pkg-config KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu11 \ - -Wdeclaration-after-statement + -O2 -fomit-frame-pointer -std=gnu11 KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) KBUILD_USERLDFLAGS := $(USERLDFLAGS) @@ -1018,9 +1017,6 @@ endif # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -# warn about C99 declaration after statement -KBUILD_CFLAGS += -Wdeclaration-after-statement - # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66cad251..d513533cc922f 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -68,11 +68,9 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -Wdeclaration-after-statement \ -std=gnu11 VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile -VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) -- GitLab From 24ec7504a08a67247fbe798d1de995208a8c128a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Feb 2024 15:25:12 +0000 Subject: [PATCH 0200/1418] sched/membarrier: reduce the ability to hammer on sys_membarrier commit 944d5fe50f3f03daacfea16300e656a1691c4a23 upstream. On some systems, sys_membarrier can be very expensive, causing overall slowdowns for everything. So put a lock on the path in order to serialize the accesses to prevent the ability for this to be called at too high of a frequency and saturate the machine. Reviewed-and-tested-by: Mathieu Desnoyers Acked-by: Borislav Petkov Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command") Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command") Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sched/membarrier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 0c5be7ebb1dca..08b16d20c85bb 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -161,6 +161,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -258,6 +261,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -346,6 +350,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -459,6 +464,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { -- GitLab From 058d1c56167ec78daf420d6c43ddd7e2ba6b9bdf Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:02 -0800 Subject: [PATCH 0201/1418] of: property: Add in-ports/out-ports support to of_graph_get_port_parent() commit 8f1e0d791b5281f3a38620bc7c57763dc551be15 upstream. Similar to the existing "ports" node name, coresight device tree bindings have added "in-ports" and "out-ports" as standard node names for a collection of ports. Add support for these name to of_graph_get_port_parent() so that remote-endpoint parsing can find the correct parent node for these coresight ports too. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-4-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 550efd1a58fc6..33d5f16c81204 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -762,7 +762,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node) /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); - if (depth == 2 && !of_node_name_eq(node, "ports")) + if (depth == 2 && !of_node_name_eq(node, "ports") && + !of_node_name_eq(node, "in-ports") && + !of_node_name_eq(node, "out-ports")) break; } return node; -- GitLab From 6589f0f72f8edd1fa11adce4eedbd3615f2e78ab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 4 Feb 2024 01:16:45 +0900 Subject: [PATCH 0202/1418] nilfs2: fix potential bug in end_buffer_async_write commit 5bc09b397cbf1221f8a8aacb1152650c9195b02b upstream. According to a syzbot report, end_buffer_async_write(), which handles the completion of block device writes, may detect abnormal condition of the buffer async_write flag and cause a BUG_ON failure when using nilfs2. Nilfs2 itself does not use end_buffer_async_write(). But, the async_write flag is now used as a marker by commit 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") as a means of resolving double list insertion of dirty blocks in nilfs_lookup_dirty_data_buffers() and nilfs_lookup_node_buffers() and the resulting crash. This modification is safe as long as it is used for file data and b-tree node blocks where the page caches are independent. However, it was irrelevant and redundant to also introduce async_write for segment summary and super root blocks that share buffers with the backing device. This led to the possibility that the BUG_ON check in end_buffer_async_write would fail as described above, if independent writebacks of the backing device occurred in parallel. The use of async_write for segment summary buffers has already been removed in a previous change. Fix this issue by removing the manipulation of the async_write flag for the remaining super root block buffer. Link: https://lkml.kernel.org/r/20240203161645.4992-1-konishi.ryusuke@gmail.com Fixes: 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+5c04210f7c7f897c1e7f@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000019a97c05fd42f8c8@google.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a4a147a983e0a..0a84613960dbf 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } break; } + set_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_begin_page_io(fs_page); fs_page = bh->b_page; @@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { @@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) } break; } + clear_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); fs_page = bh->b_page; @@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) update_sr = true; break; } + set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, 0); fs_page = bh->b_page; -- GitLab From 13f79a002602e562ce425b403fa9797dcf1cfeab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Jan 2023 01:41:14 +0900 Subject: [PATCH 0203/1418] nilfs2: replace WARN_ONs for invalid DAT metadata block requests commit 5124a0a549857c4b87173280e192eea24dea72ad upstream. If DAT metadata file block access fails due to corruption of the DAT file or abnormal virtual block numbers held by b-trees or inodes, a kernel warning is generated. This replaces the WARN_ONs by error output, so that a kernel, booted with panic_on_warn, does not panic. This patch also replaces the detected return code -ENOENT with another internal code -EINVAL to notify the bmap layer of metadata corruption. When the bmap layer sees -EINVAL, it handles the abnormal situation with nilfs_bmap_convert_error() and finally returns code -EIO as it should. Link: https://lkml.kernel.org/r/0000000000005cc3d205ea23ddcf@google.com Link: https://lkml.kernel.org/r/20230126164114.6911-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/dat.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901039f..1e7f653c1df7e 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, @@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, -- GitLab From c5d83ac2bf6ca668a39ffb1a576899a66153ba19 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:57:56 +0100 Subject: [PATCH 0204/1418] dm: limit the number of targets and parameter size area commit bd504bcfec41a503b32054da5472904b404341a4 upstream. The kvmalloc function fails with a warning if the size is larger than INT_MAX. The warning was triggered by a syscall testing robot. In order to avoid the warning, this commit limits the number of targets to 1048576 and the size of the parameter area to 1073741824. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-core.h | 2 ++ drivers/md/dm-ioctl.c | 3 ++- drivers/md/dm-table.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 71dcd8fd4050a..6314210d36971 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -21,6 +21,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 206e6ce554dc7..4376754816abe 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1877,7 +1877,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dac6a5f25f2be..e0367a672eabf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -128,7 +128,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -143,7 +148,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { -- GitLab From d028cc6d235fb0fe919bf20d785c4c7dd4eab7a9 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 14 Feb 2024 17:55:18 +0000 Subject: [PATCH 0205/1418] arm64: Subscribe Microsoft Azure Cobalt 100 to ARM Neoverse N2 errata commit fb091ff394792c018527b3211bbdfae93ea4ac02 upstream. Add the MIDR value of Microsoft Azure Cobalt 100, which is a Microsoft implemented CPU based on r0p0 of the ARM Neoverse N2 CPU, and therefore suffers from all the same errata. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Easwar Hariharan Reviewed-by: Anshuman Khandual Acked-by: Mark Rutland Acked-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240214175522.2457857-1-eahariha@linux.microsoft.com Signed-off-by: Will Deacon Signed-off-by: Easwar Hariharan Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.rst | 7 +++++++ arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 3 +++ 3 files changed, 14 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d9fce65b2f047..27135b9c07acb 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -221,3 +221,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7dce9c0aa7836..af3a678a76b3a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -128,6 +129,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -179,6 +182,7 @@ #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61f22e9c92b4c..74584597bfb82 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), -- GitLab From 9020513afafe1b28ffc5d0dad6accb4ebcd0030c Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 29 Dec 2022 15:44:43 +0400 Subject: [PATCH 0206/1418] fs/ntfs3: Add null pointer checks commit fc4992458e0aa2d2e82a25c922e6ac36c2d91083 upstream. Added null pointer checks in function ntfs_security_init. Also added le32_to_cpu in functions ntfs_security_init and indx_read. Signed-off-by: Konstantin Komarov Cc: "Doebel, Bjoern" Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/fsntfs.c | 16 ++++++++++------ fs/ntfs3/index.c | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 873b1434a9989..4b72bc7f12ca3 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1842,10 +1842,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sdh->type != ATTR_ZERO || + if(!(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || - offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sdh->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } @@ -1861,10 +1863,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sii->type != ATTR_ZERO || + if(!(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || - offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sii->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index b89a33f5761ef..7371f7855e4c4 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1097,7 +1097,8 @@ ok: } /* check for index header length */ - if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { + if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > + bytes) { err = -EINVAL; goto out; } -- GitLab From 6fd24675188d354b1cad47462969afa2ab09d819 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Jan 2024 16:04:18 +0100 Subject: [PATCH 0207/1418] mlxsw: spectrum_acl_tcam: Fix stack corruption commit 483ae90d8f976f8339cf81066312e1329f2d3706 upstream. When tc filters are first added to a net device, the corresponding local port gets bound to an ACL group in the device. The group contains a list of ACLs. In turn, each ACL points to a different TCAM region where the filters are stored. During forwarding, the ACLs are sequentially evaluated until a match is found. One reason to place filters in different regions is when they are added with decreasing priorities and in an alternating order so that two consecutive filters can never fit in the same region because of their key usage. In Spectrum-2 and newer ASICs the firmware started to report that the maximum number of ACLs in a group is more than 16, but the layout of the register that configures ACL groups (PAGT) was not updated to account for that. It is therefore possible to hit stack corruption [1] in the rare case where more than 16 ACLs in a group are required. Fix by limiting the maximum ACL group size to the minimum between what the firmware reports and the maximum ACLs that fit in the PAGT register. Add a test case to make sure the machine does not crash when this condition is hit. [1] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: mlxsw_sp_acl_tcam_group_update+0x116/0x120 [...] dump_stack_lvl+0x36/0x50 panic+0x305/0x330 __stack_chk_fail+0x15/0x20 mlxsw_sp_acl_tcam_group_update+0x116/0x120 mlxsw_sp_acl_tcam_group_region_attach+0x69/0x110 mlxsw_sp_acl_tcam_vchunk_get+0x492/0xa20 mlxsw_sp_acl_tcam_ventry_add+0x25/0xe0 mlxsw_sp_acl_rule_add+0x47/0x240 mlxsw_sp_flower_replace+0x1a9/0x1d0 tc_setup_cb_add+0xdc/0x1c0 fl_hw_replace_filter+0x146/0x1f0 fl_change+0xc17/0x1360 tc_new_tfilter+0x472/0xb90 rtnetlink_rcv_msg+0x313/0x3b0 netlink_rcv_skb+0x58/0x100 netlink_unicast+0x244/0x390 netlink_sendmsg+0x1e4/0x440 ____sys_sendmsg+0x164/0x260 ___sys_sendmsg+0x9a/0xe0 __sys_sendmsg+0x7a/0xc0 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Reported-by: Orel Hagag Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Signed-off-by: Petr Machata Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/2d91c89afba59c22587b444994ae419dbea8d876.1705502064.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 2 + .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 56 ++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3b9ba8fa247ab..dc2e204bcd727 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -65,6 +65,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + tcam->max_group_size = min_t(unsigned int, tcam->max_group_size, + MLXSW_REG_PAGT_ACL_MAX_NUM); err = ops->init(mlxsw_sp, tcam->priv, tcam); if (err) diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 7bf56ea161e35..616d3581419ca 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test" + max_erp_entries_test max_group_size_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -1033,6 +1033,60 @@ max_erp_entries_test() "max chain $chain_failed, mask $mask_failed" } +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + setup_prepare() { h1=${NETIFS[p1]} -- GitLab From 81e1dc2f70014b9523dd02ca763788e4f81e5bac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Feb 2024 09:12:53 +0100 Subject: [PATCH 0208/1418] Linux 6.1.79 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240220204841.073267068@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Jon Hunter Tested-by: Salvatore Bonaccorso Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240221130223.073542172@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: kernelci.org bot Tested-by: Yann Sionneau Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Mateusz Jończyk Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1c23f38fc5d84..d6bc9f597e8b8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 02149c7cd115d3c82d20f758be4c9013d1128928 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:11 -0500 Subject: [PATCH 0209/1418] net/sched: Retire CBQ qdisc commit 051d442098421c28c7951625652f61b1e15c4bd5 upstream. While this amazing qdisc has served us well over the years it has not been getting any tender love and care and has bitrotted over time. It has become mostly a shooting target for syzkaller lately. For this reason, we are retiring it. Goodbye CBQ - we loved you. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 17 - net/sched/Makefile | 1 - net/sched/sch_cbq.c | 1727 ----------------- .../tc-testing/tc-tests/qdiscs/cbq.json | 184 -- 4 files changed, 1929 deletions(-) delete mode 100644 net/sched/sch_cbq.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 24cf0bf7c80e5..bbd84d8bc64ae 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -45,23 +45,6 @@ if NET_SCHED comment "Queueing/Scheduling" -config NET_SCH_CBQ - tristate "Class Based Queueing (CBQ)" - help - Say Y here if you want to use the Class-Based Queueing (CBQ) packet - scheduling algorithm. This algorithm classifies the waiting packets - into a tree-like hierarchy of classes; the leaves of this tree are - in turn scheduled by separate algorithms. - - See the top of for more details. - - CBQ is a commonly used scheduler, so if you're unsure, you should - say Y here. Then say Y to all the queueing algorithms below that you - want to use as leaf disciplines. - - To compile this code as a module, choose M here: the - module will be called sch_cbq. - config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 8a33a35fc50d5..0108d1bffa512 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o -obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c deleted file mode 100644 index 36db5f6782f2c..0000000000000 --- a/net/sched/sch_cbq.c +++ /dev/null @@ -1,1727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/sch_cbq.c Class-Based Queueing discipline. - * - * Authors: Alexey Kuznetsov, - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Class-Based Queueing (CBQ) algorithm. - ======================================= - - Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", - IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 - - [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 - - [3] Sally Floyd, "Notes on Class-Based Queueing: Setting - Parameters", 1996 - - [4] Sally Floyd and Michael Speer, "Experimental Results - for Class-Based Queueing", 1998, not published. - - ----------------------------------------------------------------------- - - Algorithm skeleton was taken from NS simulator cbq.cc. - If someone wants to check this code against the LBL version, - he should take into account that ONLY the skeleton was borrowed, - the implementation is different. Particularly: - - --- The WRR algorithm is different. Our version looks more - reasonable (I hope) and works when quanta are allowed to be - less than MTU, which is always the case when real time classes - have small rates. Note, that the statement of [3] is - incomplete, delay may actually be estimated even if class - per-round allotment is less than MTU. Namely, if per-round - allotment is W*r_i, and r_1+...+r_k = r < 1 - - delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B - - In the worst case we have IntServ estimate with D = W*r+k*MTU - and C = MTU*r. The proof (if correct at all) is trivial. - - - --- It seems that cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translations - from NS. Anyone is advised to find these differences - and explain to me, why I am wrong 8). - - --- Linux has no EOI event, so that we cannot estimate true class - idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. This is wrong because of - internal device queueing, but on a permanently loaded link it is true. - Moreover, combined with clock integrator, this scheme looks - very close to an ideal solution. */ - -struct cbq_sched_data; - - -struct cbq_class { - struct Qdisc_class_common common; - struct cbq_class *next_alive; /* next class with backlog in this priority band */ - -/* Parameters */ - unsigned char priority; /* class priority */ - unsigned char priority2; /* priority to be used after overlimit */ - unsigned char ewma_log; /* time constant for idle time calculation */ - - u32 defmap; - - /* Link-sharing scheduler parameters */ - long maxidle; /* Class parameters: see below. */ - long offtime; - long minidle; - u32 avpkt; - struct qdisc_rate_table *R_tab; - - /* General scheduler (WRR) parameters */ - long allot; - long quantum; /* Allotment per WRR round */ - long weight; /* Relative allotment: see below */ - - struct Qdisc *qdisc; /* Ptr to CBQ discipline */ - struct cbq_class *split; /* Ptr to split node */ - struct cbq_class *share; /* Ptr to LS parent in the class tree */ - struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ - struct cbq_class *borrow; /* NULL if class is bandwidth limited; - parent otherwise */ - struct cbq_class *sibling; /* Sibling chain */ - struct cbq_class *children; /* Pointer to children chain */ - - struct Qdisc *q; /* Elementary queueing discipline */ - - -/* Variables */ - unsigned char cpriority; /* Effective priority */ - unsigned char delayed; - unsigned char level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of children + 1 for nodes. - */ - - psched_time_t last; /* Last end of service */ - psched_time_t undertime; - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; - - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - - int filters; - - struct cbq_class *defaults[TC_PRIO_MAX + 1]; -}; - -struct cbq_sched_data { - struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO + 1]; - unsigned int quanta[TC_CBQ_MAXPRIO + 1]; - - struct cbq_class link; - - unsigned int activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes - with backlog */ - -#ifdef CONFIG_NET_CLS_ACT - struct cbq_class *rx_class; -#endif - struct cbq_class *tx_class; - struct cbq_class *tx_borrowed; - int tx_len; - psched_time_t now; /* Cached timestamp */ - unsigned int pmask; - - struct qdisc_watchdog watchdog; /* Watchdog timer, - started when CBQ has - backlog, but cannot - transmit just now */ - psched_tdiff_t wd_expires; - int toplevel; - u32 hgenerator; -}; - - -#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) - -static inline struct cbq_class * -cbq_class_lookup(struct cbq_sched_data *q, u32 classid) -{ - struct Qdisc_class_common *clc; - - clc = qdisc_class_find(&q->clhash, classid); - if (clc == NULL) - return NULL; - return container_of(clc, struct cbq_class, common); -} - -#ifdef CONFIG_NET_CLS_ACT - -static struct cbq_class * -cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) -{ - struct cbq_class *cl; - - for (cl = this->tparent; cl; cl = cl->tparent) { - struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; - - if (new != NULL && new != this) - return new; - } - return NULL; -} - -#endif - -/* Classify packet. The procedure is pretty complicated, but - * it allows us to combine link sharing and priority scheduling - * transparently. - * - * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - * so that it resolves to split nodes. Then packets are classified - * by logical priority, or a more specific classifier may be attached - * to the split node. - */ - -static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *head = &q->link; - struct cbq_class **defmap; - struct cbq_class *cl = NULL; - u32 prio = skb->priority; - struct tcf_proto *fl; - struct tcf_result res; - - /* - * Step 1. If skb->priority points to one of our classes, use it. - */ - if (TC_H_MAJ(prio ^ sch->handle) == 0 && - (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; - - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - for (;;) { - int result = 0; - defmap = head->defaults; - - fl = rcu_dereference_bh(head->filter_list); - /* - * Step 2+n. Apply classifier. - */ - result = tcf_classify(skb, NULL, fl, &res, true); - if (!fl || result < 0) - goto fallback; - if (result == TC_ACT_SHOT) - return NULL; - - cl = (void *)res.class; - if (!cl) { - if (TC_H_MAJ(res.classid)) - cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) - cl = defmap[TC_PRIO_BESTEFFORT]; - - if (cl == NULL) - goto fallback; - } - if (cl->level >= head->level) - goto fallback; -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - fallthrough; - case TC_ACT_RECLASSIFY: - return cbq_reclassify(skb, cl); - } -#endif - if (cl->level == 0) - return cl; - - /* - * Step 3+n. If classifier selected a link sharing class, - * apply agency specific classifier. - * Repeat this procedure until we hit a leaf node. - */ - head = cl; - } - -fallback: - cl = head; - - /* - * Step 4. No success... - */ - if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio & TC_PRIO_MAX]) && - !(cl = head->defaults[TC_PRIO_BESTEFFORT])) - return head; - - return cl; -} - -/* - * A packet has just been enqueued on the empty class. - * cbq_activate_class adds it to the tail of active class list - * of its priority band. - */ - -static inline void cbq_activate_class(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - int prio = cl->cpriority; - struct cbq_class *cl_tail; - - cl_tail = q->active[prio]; - q->active[prio] = cl; - - if (cl_tail != NULL) { - cl->next_alive = cl_tail->next_alive; - cl_tail->next_alive = cl; - } else { - cl->next_alive = cl; - q->activemask |= (1<qdisc); - int prio = this->cpriority; - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<active[prio]); -} - -static void -cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) -{ - int toplevel = q->toplevel; - - if (toplevel > cl->level) { - psched_time_t now = psched_get_time(); - - do { - if (cl->undertime < now) { - q->toplevel = cl->level; - return; - } - } while ((cl = cl->borrow) != NULL && toplevel > cl->level); - } -} - -static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - int ret; - struct cbq_class *cl = cbq_classify(skb, sch, &ret); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; -#endif - if (cl == NULL) { - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; - } - - ret = qdisc_enqueue(skb, cl->q, to_free); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } - return ret; -} - -/* Overlimit action: penalize leaf class by adding offtime */ -static void cbq_overlimit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (!cl->delayed) { - delay += cl->offtime; - - /* - * Class goes to sleep, so that it will have no - * chance to work avgidle. Let's forgive it 8) - * - * BTW cbq-2.0 has a crap in this - * place, apparently they forgot to shift it by cl->ewma_log. - */ - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - if (delay <= 0) - delay = 1; - cl->undertime = q->now + delay; - - cl->xstats.overactions++; - cl->delayed = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; - - /* Dirty work! We must schedule wakeups based on - * real available rate, rather than leaf rate, - * which may be tiny (even zero). - */ - if (q->toplevel == TC_CBQ_MAXLEVEL) { - struct cbq_class *b; - psched_tdiff_t base_delay = q->wd_expires; - - for (b = cl->borrow; b; b = b->borrow) { - delay = b->undertime - q->now; - if (delay < base_delay) { - if (delay <= 0) - delay = 1; - base_delay = delay; - } - } - - q->wd_expires = base_delay; - } -} - -/* - * It is mission critical procedure. - * - * We "regenerate" toplevel cutoff, if transmitting class - * has backlog and it is not regulated. It is not part of - * original CBQ description, but looks more reasonable. - * Probably, it is wrong. This question needs further investigation. - */ - -static inline void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, - struct cbq_class *borrowed) -{ - if (cl && q->toplevel >= borrowed->level) { - if (cl->q->q.qlen > 1) { - do { - if (borrowed->undertime == PSCHED_PASTPERFECT) { - q->toplevel = borrowed->level; - return; - } - } while ((borrowed = borrowed->borrow) != NULL); - } -#if 0 - /* It is not necessary now. Uncommenting it - will save CPU cycles, but decrease fairness. - */ - q->toplevel = TC_CBQ_MAXLEVEL; -#endif - } -} - -static void -cbq_update(struct cbq_sched_data *q) -{ - struct cbq_class *this = q->tx_class; - struct cbq_class *cl = this; - int len = q->tx_len; - psched_time_t now; - - q->tx_class = NULL; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - */ - now = q->now + L2T(&q->link, len); - - for ( ; cl; cl = cl->share) { - long avgidle = cl->avgidle; - long idle; - - _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. - * (last_pktlen/rate) is "virtual" busy time, so that - * - * idle = (now - last) - last_pktlen/rate - */ - - idle = now - cl->last; - if ((unsigned long)idle > 128*1024*1024) { - avgidle = cl->maxidle; - } else { - idle -= L2T(cl, len); - - /* true_avgidle := (1-W)*true_avgidle + W*idle, - * where W=2^{-ewma_log}. But cl->avgidle is scaled: - * cl->avgidle == true_avgidle/W, - * hence: - */ - avgidle += idle - (avgidle>>cl->ewma_log); - } - - if (avgidle <= 0) { - /* Overlimit or at-limit */ - - if (avgidle < cl->minidle) - avgidle = cl->minidle; - - cl->avgidle = avgidle; - - /* Calculate expected time, when this class - * will be allowed to send. - * It will occur, when: - * (1-W)*true_avgidle + W*delay = 0, i.e. - * idle = (1/W - 1)*(-true_avgidle) - * or - * idle = (1 - W)*(-cl->avgidle); - */ - idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); - - /* - * That is not all. - * To maintain the rate allocated to the class, - * we add to undertime virtual clock, - * necessary to complete transmitted packet. - * (len/phys_bandwidth has been already passed - * to the moment of cbq_update) - */ - - idle -= L2T(&q->link, len); - idle += L2T(cl, len); - - cl->undertime = now + idle; - } else { - /* Underlimit */ - - cl->undertime = PSCHED_PASTPERFECT; - if (avgidle > cl->maxidle) - cl->avgidle = cl->maxidle; - else - cl->avgidle = avgidle; - } - if ((s64)(now - cl->last) > 0) - cl->last = now; - } - - cbq_update_toplevel(q, this, q->tx_borrowed); -} - -static inline struct cbq_class * -cbq_under_limit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this_cl = cl; - - if (cl->tparent == NULL) - return cl; - - if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { - cl->delayed = 0; - return cl; - } - - do { - /* It is very suspicious place. Now overlimit - * action is generated for not bounded classes - * only if link is completely congested. - * Though it is in agree with ancestor-only paradigm, - * it looks very stupid. Particularly, - * it means that this chunk of code will either - * never be called or result in strong amplification - * of burstiness. Dangerous, silly, and, however, - * no another solution exists. - */ - cl = cl->borrow; - if (!cl) { - this_cl->qstats.overlimits++; - cbq_overlimit(this_cl); - return NULL; - } - if (cl->level > q->toplevel) - return NULL; - } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); - - cl->delayed = 0; - return cl; -} - -static inline struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl_tail, *cl_prev, *cl; - struct sk_buff *skb; - int deficit; - - cl_tail = cl_prev = q->active[prio]; - cl = cl_prev->next_alive; - - do { - deficit = 0; - - /* Start round */ - do { - struct cbq_class *borrow = cl; - - if (cl->q->q.qlen && - (borrow = cbq_under_limit(cl)) == NULL) - goto skip_class; - - if (cl->deficit <= 0) { - /* Class exhausted its allotment per - * this round. Switch to the next one. - */ - deficit = 1; - cl->deficit += cl->quantum; - goto next_class; - } - - skb = cl->q->dequeue(cl->q); - - /* Class did not give us any skb :-( - * It could occur even if cl->q->q.qlen != 0 - * f.e. if cl->q == "tbf" - */ - if (skb == NULL) - goto skip_class; - - cl->deficit -= qdisc_pkt_len(skb); - q->tx_class = cl; - q->tx_borrowed = borrow; - if (borrow != cl) { -#ifndef CBQ_XSTATS_BORROWS_BYTES - borrow->xstats.borrows++; - cl->xstats.borrows++; -#else - borrow->xstats.borrows += qdisc_pkt_len(skb); - cl->xstats.borrows += qdisc_pkt_len(skb); -#endif - } - q->tx_len = qdisc_pkt_len(skb); - - if (cl->deficit <= 0) { - q->active[prio] = cl; - cl = cl->next_alive; - cl->deficit += cl->quantum; - } - return skb; - -skip_class: - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { - /* Class is empty or penalized. - * Unlink it from active chain. - */ - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - /* Did cl_tail point to it? */ - if (cl == cl_tail) { - /* Repair it! */ - cl_tail = cl_prev; - - /* Was it the last class in this band? */ - if (cl == cl_tail) { - /* Kill the band! */ - q->active[prio] = NULL; - q->activemask &= ~(1<q->q.qlen) - cbq_activate_class(cl); - return NULL; - } - - q->active[prio] = cl_tail; - } - if (cl->q->q.qlen) - cbq_activate_class(cl); - - cl = cl_prev; - } - -next_class: - cl_prev = cl; - cl = cl->next_alive; - } while (cl_prev != cl_tail); - } while (deficit); - - q->active[prio] = cl_prev; - - return NULL; -} - -static inline struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - unsigned int activemask; - - activemask = q->activemask & 0xFF; - while (activemask) { - int prio = ffz(~activemask); - activemask &= ~(1<tx_class) - cbq_update(q); - - q->now = now; - - for (;;) { - q->wd_expires = 0; - - skb = cbq_dequeue_1(sch); - if (skb) { - qdisc_bstats_update(sch, skb); - sch->q.qlen--; - return skb; - } - - /* All the classes are overlimit. - * - * It is possible, if: - * - * 1. Scheduler is empty. - * 2. Toplevel cutoff inhibited borrowing. - * 3. Root class is overlimit. - * - * Reset 2d and 3d conditions and retry. - * - * Note, that NS and cbq-2.0 are buggy, peeking - * an arbitrary class is appropriate for ancestor-only - * sharing, but not for toplevel algorithm. - * - * Our version is better, but slower, because it requires - * two passes, but it is unavoidable with top-level sharing. - */ - - if (q->toplevel == TC_CBQ_MAXLEVEL && - q->link.undertime == PSCHED_PASTPERFECT) - break; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->link.undertime = PSCHED_PASTPERFECT; - } - - /* No packets in scheduler or nobody wants to give them to us :-( - * Sigh... start watchdog timer in the last case. - */ - - if (sch->q.qlen) { - qdisc_qstats_overlimit(sch); - if (q->wd_expires) - qdisc_watchdog_schedule(&q->watchdog, - now + q->wd_expires); - } - return NULL; -} - -/* CBQ class maintenance routines */ - -static void cbq_adjust_levels(struct cbq_class *this) -{ - if (this == NULL) - return; - - do { - int level = 0; - struct cbq_class *cl; - - cl = this->children; - if (cl) { - do { - if (cl->level > level) - level = cl->level; - } while ((cl = cl->sibling) != this->children); - } - this->level = level + 1; - } while ((this = this->tparent) != NULL); -} - -static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) -{ - struct cbq_class *cl; - unsigned int h; - - if (q->quanta[prio] == 0) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - /* BUGGGG... Beware! This expression suffer of - * arithmetic overflows! - */ - if (cl->priority == prio) { - cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ - q->quanta[prio]; - } - if (cl->quantum <= 0 || - cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); - cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; - } - } - } -} - -static void cbq_sync_defmap(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *split = cl->split; - unsigned int h; - int i; - - if (split == NULL) - return; - - for (i = 0; i <= TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap & (1<defaults[i] = NULL; - } - - for (i = 0; i <= TC_PRIO_MAX; i++) { - int level = split->level; - - if (split->defaults[i]) - continue; - - for (h = 0; h < q->clhash.hashsize; h++) { - struct cbq_class *c; - - hlist_for_each_entry(c, &q->clhash.hash[h], - common.hnode) { - if (c->split == split && c->level < level && - c->defmap & (1<defaults[i] = c; - level = c->level; - } - } - } - } -} - -static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) -{ - struct cbq_class *split = NULL; - - if (splitid == 0) { - split = cl->split; - if (!split) - return; - splitid = split->common.classid; - } - - if (split == NULL || split->common.classid != splitid) { - for (split = cl->tparent; split; split = split->tparent) - if (split->common.classid == splitid) - break; - } - - if (split == NULL) - return; - - if (cl->split != split) { - cl->defmap = 0; - cbq_sync_defmap(cl); - cl->split = split; - cl->defmap = def & mask; - } else - cl->defmap = (cl->defmap & ~mask) | (def & mask); - - cbq_sync_defmap(cl); -} - -static void cbq_unlink_class(struct cbq_class *this) -{ - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - - qdisc_class_hash_remove(&q->clhash, &this->common); - - if (this->tparent) { - clp = &this->sibling; - cl = *clp; - do { - if (cl == this) { - *clp = cl->sibling; - break; - } - clp = &cl->sibling; - } while ((cl = *clp) != this->sibling); - - if (this->tparent->children == this) { - this->tparent->children = this->sibling; - if (this->sibling == this) - this->tparent->children = NULL; - } - } else { - WARN_ON(this->sibling != this); - } -} - -static void cbq_link_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - struct cbq_class *parent = this->tparent; - - this->sibling = this; - qdisc_class_hash_insert(&q->clhash, &this->common); - - if (parent == NULL) - return; - - if (parent->children == NULL) { - parent->children = this; - } else { - this->sibling = parent->children->sibling; - parent->children->sibling = this; - } -} - -static void -cbq_reset(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int prio; - unsigned int h; - - q->activemask = 0; - q->pmask = 0; - q->tx_class = NULL; - q->tx_borrowed = NULL; - qdisc_watchdog_cancel(&q->watchdog); - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) - q->active[prio] = NULL; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - qdisc_reset(cl->q); - - cl->next_alive = NULL; - cl->undertime = PSCHED_PASTPERFECT; - cl->avgidle = cl->maxidle; - cl->deficit = cl->quantum; - cl->cpriority = cl->priority; - } - } -} - - -static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) -{ - if (lss->change & TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; - } - if (lss->change & TCF_CBQ_LSS_EWMA) - cl->ewma_log = lss->ewma_log; - if (lss->change & TCF_CBQ_LSS_AVPKT) - cl->avpkt = lss->avpkt; - if (lss->change & TCF_CBQ_LSS_MINIDLE) - cl->minidle = -(long)lss->minidle; - if (lss->change & TCF_CBQ_LSS_MAXIDLE) { - cl->maxidle = lss->maxidle; - cl->avgidle = lss->maxidle; - } - if (lss->change & TCF_CBQ_LSS_OFFTIME) - cl->offtime = lss->offtime; -} - -static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]--; - q->quanta[cl->priority] -= cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]++; - q->quanta[cl->priority] += cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - if (wrr->allot) - cl->allot = wrr->allot; - if (wrr->weight) - cl->weight = wrr->weight; - if (wrr->priority) { - cl->priority = wrr->priority - 1; - cl->cpriority = cl->priority; - if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO - 1; - } - - cbq_addprio(q, cl); - return 0; -} - -static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) -{ - cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); - return 0; -} - -static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { - [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, - [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, - [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, - [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, - [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, - [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, - [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, -}; - -static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], - struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - int err; - - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, - cbq_policy, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_WRROPT]) { - const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); - - if (wrr->priority > TC_CBQ_MAXPRIO) { - NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); - err = -EINVAL; - } - } - return err; -} - -static int cbq_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct tc_ratespec *r; - int err; - - qdisc_watchdog_init(&q->watchdog, sch); - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { - NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); - return -EINVAL; - } - - r = nla_data(tb[TCA_CBQ_RATE]); - - q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); - if (!q->link.R_tab) - return -EINVAL; - - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); - if (err) - goto put_rtab; - - err = qdisc_class_hash_init(&q->clhash); - if (err < 0) - goto put_block; - - q->link.sibling = &q->link; - q->link.common.classid = sch->handle; - q->link.qdisc = sch; - q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (!q->link.q) - q->link.q = &noop_qdisc; - else - qdisc_hash_add(q->link.q, true); - - q->link.priority = TC_CBQ_MAXPRIO - 1; - q->link.priority2 = TC_CBQ_MAXPRIO - 1; - q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.allot = psched_mtu(qdisc_dev(sch)); - q->link.quantum = q->link.allot; - q->link.weight = q->link.R_tab->rate.rate; - - q->link.ewma_log = TC_CBQ_DEF_EWMA; - q->link.avpkt = q->link.allot/2; - q->link.minidle = -0x7FFFFFFF; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - cbq_link_class(&q->link); - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); - - cbq_addprio(q, &q->link); - return 0; - -put_block: - tcf_block_put(q->link.block); - -put_rtab: - qdisc_put_rtab(q->link.R_tab); - return err; -} - -static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - - if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_lssopt opt; - - opt.flags = 0; - if (cl->borrow == NULL) - opt.flags |= TCF_CBQ_LSS_BOUNDED; - if (cl->share == NULL) - opt.flags |= TCF_CBQ_LSS_ISOLATED; - opt.ewma_log = cl->ewma_log; - opt.level = cl->level; - opt.avpkt = cl->avpkt; - opt.maxidle = cl->maxidle; - opt.minidle = (u32)(-cl->minidle); - opt.offtime = cl->offtime; - opt.change = ~0; - if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_wrropt opt; - - memset(&opt, 0, sizeof(opt)); - opt.flags = 0; - opt.allot = cl->allot; - opt.priority = cl->priority + 1; - opt.cpriority = cl->cpriority + 1; - opt.weight = cl->weight; - if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_fopt opt; - - if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->common.classid : 0; - opt.defmap = cl->defmap; - opt.defchange = ~0; - if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) -{ - if (cbq_dump_lss(skb, cl) < 0 || - cbq_dump_rate(skb, cl) < 0 || - cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_fopt(skb, cl) < 0) - return -1; - return 0; -} - -static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, &q->link) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - q->link.xstats.avgidle = q->link.avgidle; - return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); -} - -static int -cbq_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - struct nlattr *nest; - - if (cl->tparent) - tcm->tcm_parent = cl->tparent->common.classid; - else - tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->common.classid; - tcm->tcm_info = cl->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, cl) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - __u32 qlen; - - cl->xstats.avgidle = cl->avgidle; - cl->xstats.undertime = 0; - qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); - - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; - - return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); -} - -static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old, struct netlink_ext_ack *extack) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack); - if (new == NULL) - return -ENOBUFS; - } - - *old = qdisc_replace(sch, new, &cl->q); - return 0; -} - -static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - return cl->q; -} - -static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cbq_deactivate_class(cl); -} - -static unsigned long cbq_find(struct Qdisc *sch, u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - return (unsigned long)cbq_class_lookup(q, classid); -} - -static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - WARN_ON(cl->filters); - - tcf_block_put(cl->block); - qdisc_put(cl->q); - qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->rate_est); - if (cl != &q->link) - kfree(cl); -} - -static void cbq_destroy(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *next; - struct cbq_class *cl; - unsigned int h; - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = NULL; -#endif - /* - * Filters must be destroyed first because we don't destroy the - * classes from root to leafs which means that filters can still - * be bound to classes which have been destroyed already. --TGR '04 - */ - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - tcf_block_put(cl->block); - cl->block = NULL; - } - } - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], - common.hnode) - cbq_destroy_class(sch, cl); - } - qdisc_class_hash_destroy(&q->clhash); -} - -static int -cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg, struct netlink_ext_ack *extack) -{ - int err; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)*arg; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct cbq_class *parent; - struct qdisc_rate_table *rtab = NULL; - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { - NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); - return -EOPNOTSUPP; - } - - if (cl) { - /* Check parent */ - if (parentid) { - if (cl->tparent && - cl->tparent->common.classid != parentid) { - NL_SET_ERR_MSG(extack, "Invalid parent id"); - return -EINVAL; - } - if (!cl->tparent && parentid != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "Parent must be root"); - return -EINVAL; - } - } - - if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB], extack); - if (rtab == NULL) - return -EINVAL; - } - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, - true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); - qdisc_put_rtab(rtab); - return err; - } - } - - /* Change class parameters */ - sch_tree_lock(sch); - - if (cl->next_alive != NULL) - cbq_deactivate_class(cl); - - if (rtab) { - qdisc_put_rtab(cl->R_tab); - cl->R_tab = rtab; - } - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - - if (tb[TCA_CBQ_WRROPT]) { - cbq_rmprio(q, cl); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - } - - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - - if (cl->q->q.qlen) - cbq_activate_class(cl); - - sch_tree_unlock(sch); - - return 0; - } - - if (parentid == TC_H_ROOT) - return -EINVAL; - - if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { - NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); - return -EINVAL; - } - - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], - extack); - if (rtab == NULL) - return -EINVAL; - - if (classid) { - err = -EINVAL; - if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) { - NL_SET_ERR_MSG(extack, "Specified class not found"); - goto failure; - } - } else { - int i; - classid = TC_H_MAKE(sch->handle, 0x8000); - - for (i = 0; i < 0x8000; i++) { - if (++q->hgenerator >= 0x8000) - q->hgenerator = 1; - if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) - break; - } - err = -ENOSR; - if (i >= 0x8000) { - NL_SET_ERR_MSG(extack, "Unable to generate classid"); - goto failure; - } - classid = classid|q->hgenerator; - } - - parent = &q->link; - if (parentid) { - parent = cbq_class_lookup(q, parentid); - err = -EINVAL; - if (!parent) { - NL_SET_ERR_MSG(extack, "Failed to find parentid"); - goto failure; - } - } - - err = -ENOBUFS; - cl = kzalloc(sizeof(*cl), GFP_KERNEL); - if (cl == NULL) - goto failure; - - gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); - goto failure; - } - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } - } - - cl->R_tab = rtab; - rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - NULL); - if (!cl->q) - cl->q = &noop_qdisc; - else - qdisc_hash_add(cl->q, true); - - cl->common.classid = classid; - cl->tparent = parent; - cl->qdisc = sch; - cl->allot = parent->allot; - cl->quantum = cl->allot; - cl->weight = cl->R_tab->rate.rate; - - sch_tree_lock(sch); - cbq_link_class(cl); - cl->borrow = cl->tparent; - if (cl->tparent != &q->link) - cl->share = cl->tparent; - cbq_adjust_levels(parent); - cl->minidle = -0x7FFFFFFF; - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log == 0) - cl->ewma_log = q->link.ewma_log; - if (cl->maxidle == 0) - cl->maxidle = q->link.maxidle; - if (cl->avpkt == 0) - cl->avpkt = q->link.avpkt; - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); - - *arg = (unsigned long)cl; - return 0; - -failure: - qdisc_put_rtab(rtab); - return err; -} - -static int cbq_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl->filters || cl->children || cl == &q->link) - return -EBUSY; - - sch_tree_lock(sch); - - qdisc_purge_queue(cl->q); - - if (cl->next_alive) - cbq_deactivate_class(cl); - - if (q->tx_borrowed == cl) - q->tx_borrowed = q->tx_class; - if (q->tx_class == cl) { - q->tx_class = NULL; - q->tx_borrowed = NULL; - } -#ifdef CONFIG_NET_CLS_ACT - if (q->rx_class == cl) - q->rx_class = NULL; -#endif - - cbq_unlink_class(cl); - cbq_adjust_levels(cl->tparent); - cl->defmap = 0; - cbq_sync_defmap(cl); - - cbq_rmprio(q, cl); - sch_tree_unlock(sch); - - cbq_destroy_class(sch, cl); - return 0; -} - -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl == NULL) - cl = &q->link; - - return cl->block; -} - -static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class *)parent; - struct cbq_class *cl = cbq_class_lookup(q, classid); - - if (cl) { - if (p && p->level <= cl->level) - return 0; - cl->filters++; - return (unsigned long)cl; - } - return 0; -} - -static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cl->filters--; -} - -static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - unsigned int h; - - if (arg->stop) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) - return; - } - } -} - -static const struct Qdisc_class_ops cbq_class_ops = { - .graft = cbq_graft, - .leaf = cbq_leaf, - .qlen_notify = cbq_qlen_notify, - .find = cbq_find, - .change = cbq_change_class, - .delete = cbq_delete, - .walk = cbq_walk, - .tcf_block = cbq_tcf_block, - .bind_tcf = cbq_bind_filter, - .unbind_tcf = cbq_unbind_filter, - .dump = cbq_dump_class, - .dump_stats = cbq_dump_class_stats, -}; - -static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &cbq_class_ops, - .id = "cbq", - .priv_size = sizeof(struct cbq_sched_data), - .enqueue = cbq_enqueue, - .dequeue = cbq_dequeue, - .peek = qdisc_peek_dequeued, - .init = cbq_init, - .reset = cbq_reset, - .destroy = cbq_destroy, - .change = NULL, - .dump = cbq_dump, - .dump_stats = cbq_dump_stats, - .owner = THIS_MODULE, -}; - -static int __init cbq_module_init(void) -{ - return register_qdisc(&cbq_qdisc_ops); -} -static void __exit cbq_module_exit(void) -{ - unregister_qdisc(&cbq_qdisc_ops); -} -module_init(cbq_module_init) -module_exit(cbq_module_exit) -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json deleted file mode 100644 index 1ab21c83a1223..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json +++ /dev/null @@ -1,184 +0,0 @@ -[ - { - "id": "3460", - "name": "Create CBQ with default setting", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "0592", - "name": "Create CBQ with mpu", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4684", - "name": "Create CBQ with valid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4345", - "name": "Create CBQ with invalid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4525", - "name": "Create CBQ with valid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6784", - "name": "Create CBQ with invalid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5468", - "name": "Delete CBQ with handle", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "492a", - "name": "Show CBQ class", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From 09038f47e45cd5dbb02315db2134403a6b160ceb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:12 -0500 Subject: [PATCH 0210/1418] net/sched: Retire ATM qdisc commit fb38306ceb9e770adfb5ffa6e3c64047b55f7a07 upstream. The ATM qdisc has served us well over the years but has not been getting much TLC due to lack of known users. Most recently it has become a shooting target for syzkaller. For this reason, we are retiring it. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 14 - net/sched/Makefile | 1 - net/sched/sch_atm.c | 706 ------------------ .../tc-testing/tc-tests/qdiscs/atm.json | 94 --- 4 files changed, 815 deletions(-) delete mode 100644 net/sched/sch_atm.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index bbd84d8bc64ae..9bccbff826fb4 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -68,20 +68,6 @@ config NET_SCH_HFSC To compile this code as a module, choose M here: the module will be called sch_hfsc. -config NET_SCH_ATM - tristate "ATM Virtual Circuits (ATM)" - depends on ATM - help - Say Y here if you want to use the ATM pseudo-scheduler. This - provides a framework for invoking classifiers, which in turn - select classes of this queuing discipline. Each class maps - the flow(s) it is handling to a given virtual circuit. - - See the top of for more details. - - To compile this code as a module, choose M here: the - module will be called sch_atm. - config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 0108d1bffa512..0c5762f5e07b4 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o -obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c deleted file mode 100644 index 4a981ca90b0bf..0000000000000 --- a/net/sched/sch_atm.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for fput */ -#include -#include -#include - -/* - * The ATM queuing discipline provides a framework for invoking classifiers - * (aka "filters"), which in turn select classes of this queuing discipline. - * Each class maps the flow(s) it is handling to a given VC. Multiple classes - * may share the same VC. - * - * When creating a class, VCs are specified by passing the number of the open - * socket descriptor by which the calling process references the VC. The kernel - * keeps the VC open at least until all classes using it are removed. - * - * In this file, most functions are named atm_tc_* to avoid confusion with all - * the atm_* in net/atm. This naming convention differs from what's used in the - * rest of net/sched. - * - * Known bugs: - * - sometimes messes up the IP stack - * - any manipulations besides the few operations described in the README, are - * untested and likely to crash the system - * - should lock the flow while there is data in the queue (?) - */ - -#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - -struct atm_flow_data { - struct Qdisc_class_common common; - struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc, - struct sk_buff *skb); /* chaining */ - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; - NULL to set CLP instead */ - int hdr_len; - unsigned char hdr[]; /* header data; MUST BE LAST */ -}; - -struct atm_qdisc_data { - struct atm_flow_data link; /* unclassified skbs go here */ - struct list_head flows; /* NB: "link" is also on this - list */ - struct tasklet_struct task; /* dequeue tasklet */ -}; - -/* ------------------------- Class/flow operations ------------------------- */ - -static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - list_for_each_entry(flow, &p->flows, list) { - if (flow->common.classid == classid) - return flow; - } - return NULL; -} - -static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", - sch, p, flow, new, old); - if (list_empty(&flow->list)) - return -EINVAL; - if (!new) - new = &noop_qdisc; - *old = flow->q; - flow->q = new; - if (*old) - qdisc_reset(*old); - return 0; -} - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); - return flow ? flow->q : NULL; -} - -static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -/* - * atm_tc_put handles all destructions, including the ones that are explicitly - * requested (atm_tc_destroy, etc.). The assumption here is that we never drop - * anything that still seems to be in use. - */ -static void atm_tc_put(struct Qdisc *sch, unsigned long cl) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (--flow->ref) - return; - pr_debug("atm_tc_put: destroying\n"); - list_del_init(&flow->list); - pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_put(flow->q); - tcf_block_put(flow->block); - if (flow->sock) { - pr_debug("atm_tc_put: f_count %ld\n", - file_count(flow->sock->file)); - flow->vcc->pop = flow->old_pop; - sockfd_put(flow->sock); - } - if (flow->excess) - atm_tc_put(sch, (unsigned long)flow->excess); - if (flow != &p->link) - kfree(flow); - /* - * If flow == &p->link, the qdisc no longer works at this point and - * needs to be removed. (By the caller of atm_tc_put.) - */ -} - -static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - - pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); - VCC2FLOW(vcc)->old_pop(vcc, skb); - tasklet_schedule(&p->task); -} - -static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, 0x00, - 0x08, 0x00 -}; /* Ethertype IP (0800) */ - -static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { - [TCA_ATM_FD] = { .type = NLA_U32 }, - [TCA_ATM_EXCESS] = { .type = NLA_U32 }, -}; - -static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)*arg; - struct atm_flow_data *excess = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_ATM_MAX + 1]; - struct socket *sock; - int fd, error, hdr_len; - void *hdr; - - pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); - /* - * The concept of parents doesn't apply for this qdisc. - */ - if (parent && parent != TC_H_ROOT && parent != sch->handle) - return -EINVAL; - /* - * ATM classes cannot be changed. In order to change properties of the - * ATM connection, that socket needs to be modified directly (via the - * native ATM API. In order to send a flow to a different VC, the old - * class needs to be removed and a new one added. (This may be changed - * later.) - */ - if (flow) - return -EBUSY; - if (opt == NULL) - return -EINVAL; - - error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, - NULL); - if (error < 0) - return error; - - if (!tb[TCA_ATM_FD]) - return -EINVAL; - fd = nla_get_u32(tb[TCA_ATM_FD]); - pr_debug("atm_tc_change: fd %d\n", fd); - if (tb[TCA_ATM_HDR]) { - hdr_len = nla_len(tb[TCA_ATM_HDR]); - hdr = nla_data(tb[TCA_ATM_HDR]); - } else { - hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ - } - if (!tb[TCA_ATM_EXCESS]) - excess = NULL; - else { - excess = (struct atm_flow_data *) - atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); - if (!excess) - return -ENOENT; - } - pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->nla_type, nla_len(opt), hdr_len); - sock = sockfd_lookup(fd, &error); - if (!sock) - return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); - if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { - error = -EPROTOTYPE; - goto err_out; - } - /* @@@ should check if the socket is really operational or we'll crash - on vcc->send */ - if (classid) { - if (TC_H_MAJ(classid ^ sch->handle)) { - pr_debug("atm_tc_change: classid mismatch\n"); - error = -EINVAL; - goto err_out; - } - } else { - int i; - unsigned long cl; - - for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_find(sch, classid); - if (!cl) - break; - } - } - pr_debug("atm_tc_change: new id %x\n", classid); - flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); - pr_debug("atm_tc_change: flow %p\n", flow); - if (!flow) { - error = -ENOBUFS; - goto err_out; - } - - error = tcf_block_get(&flow->block, &flow->filter_list, sch, - extack); - if (error) { - kfree(flow); - goto err_out; - } - - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - extack); - if (!flow->q) - flow->q = &noop_qdisc; - pr_debug("atm_tc_change: qdisc %p\n", flow->q); - flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ - flow->vcc->user_back = flow; - pr_debug("atm_tc_change: vcc %p\n", flow->vcc); - flow->old_pop = flow->vcc->pop; - flow->parent = p; - flow->vcc->pop = sch_atm_pop; - flow->common.classid = classid; - flow->ref = 1; - flow->excess = excess; - list_add(&flow->list, &p->link.list); - flow->hdr_len = hdr_len; - if (hdr) - memcpy(flow->hdr, hdr, hdr_len); - else - memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); - *arg = (unsigned long)flow; - return 0; -err_out: - sockfd_put(sock); - return error; -} - -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (list_empty(&flow->list)) - return -EINVAL; - if (rcu_access_pointer(flow->filter_list) || flow == &p->link) - return -EBUSY; - /* - * Reference count must be 2: one for "keepalive" (set at class - * creation), and one for the reference held when calling delete. - */ - if (flow->ref < 2) { - pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); - return -EINVAL; - } - if (flow->ref > 2) - return -EBUSY; /* catch references via excess, etc. */ - atm_tc_put(sch, arg); - return 0; -} - -static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); - if (walker->stop) - return; - list_for_each_entry(flow, &p->flows, list) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) - break; - } -} - -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? flow->block : p->link.block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - struct tcf_result res; - int result; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - - pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_ACT_OK; /* be nice to gcc */ - flow = NULL; - if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { - struct tcf_proto *fl; - - list_for_each_entry(flow, &p->flows, list) { - fl = rcu_dereference_bh(flow->filter_list); - if (fl) { - result = tcf_classify(skb, NULL, fl, &res, true); - if (result < 0) - continue; - if (result == TC_ACT_SHOT) - goto done; - - flow = (struct atm_flow_data *)res.class; - if (!flow) - flow = lookup_flow(sch, res.classid); - goto drop; - } - } - flow = NULL; -done: - ; - } - if (!flow) { - flow = &p->link; - } else { - if (flow->vcc) - ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - __qdisc_drop(skb, to_free); - goto drop; - case TC_ACT_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } -#endif - } - - ret = qdisc_enqueue(skb, flow->q, to_free); - if (ret != NET_XMIT_SUCCESS) { -drop: __maybe_unused - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - if (flow) - flow->qstats.drops++; - } - return ret; - } - /* - * Okay, this may seem weird. We pretend we've dropped the packet if - * it goes via ATM. The reason for this is that the outer qdisc - * expects to be able to q->dequeue the packet later on if we return - * success at this place. Also, sch->q.qdisc needs to reflect whether - * there is a packet egligible for dequeuing or not. Note that the - * statistics of the outer qdisc are necessarily wrong because of all - * this. There's currently no correct solution for this. - */ - if (flow == &p->link) { - sch->q.qlen++; - return NET_XMIT_SUCCESS; - } - tasklet_schedule(&p->task); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -/* - * Dequeue packets and send them over ATM. Note that we quite deliberately - * avoid checking net_device's flow control here, simply because sch_atm - * uses its own channels, which have nothing to do with any CLIP/LANE/or - * non-ATM interfaces. - */ - -static void sch_atm_dequeue(struct tasklet_struct *t) -{ - struct atm_qdisc_data *p = from_tasklet(p, t, task); - struct Qdisc *sch = qdisc_from_priv(p); - struct atm_flow_data *flow; - struct sk_buff *skb; - - pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow == &p->link) - continue; - /* - * If traffic is properly shaped, this won't generate nasty - * little bursts. Otherwise, it may ... (but that's okay) - */ - while ((skb = flow->q->ops->peek(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) - break; - - skb = qdisc_dequeue_peeked(flow->q); - if (unlikely(!skb)) - break; - - qdisc_bstats_update(sch, skb); - bstats_update(&flow->bstats, skb); - pr_debug("atm_tc_dequeue: sending on class %p\n", flow); - /* remove any LL header somebody else has attached */ - skb_pull(skb, skb_network_offset(skb)); - if (skb_headroom(skb) < flow->hdr_len) { - struct sk_buff *new; - - new = skb_realloc_headroom(skb, flow->hdr_len); - dev_kfree_skb(skb); - if (!new) - continue; - skb = new; - } - pr_debug("sch_atm_dequeue: ip %p, data %p\n", - skb_network_header(skb), skb->data); - ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb, flow->hdr_len), flow->hdr, - flow->hdr_len); - refcount_add(skb->truesize, - &sk_atm(flow->vcc)->sk_wmem_alloc); - /* atm.atm_options are already set by atm_tc_enqueue */ - flow->vcc->send(flow->vcc, skb); - } - } -} - -static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - - pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); - tasklet_schedule(&p->task); - skb = qdisc_dequeue_peeked(p->link.q); - if (skb) - sch->q.qlen--; - return skb; -} - -static struct sk_buff *atm_tc_peek(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - - pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); - - return p->link.q->ops->peek(p->link.q); -} - -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); - if (!p->link.q) - p->link.q = &noop_qdisc; - pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; - - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, - extack); - if (err) - return err; - - tasklet_setup(&p->task, sch_atm_dequeue); - return 0; -} - -static void atm_tc_reset(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) - qdisc_reset(flow->q); -} - -static void atm_tc_destroy(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow, *tmp; - - pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - tcf_block_put(flow->block); - flow->block = NULL; - } - - list_for_each_entry_safe(flow, tmp, &p->flows, list) { - if (flow->ref > 1) - pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); - atm_tc_put(sch, (unsigned long)flow); - } - tasklet_kill(&p->task); -} - -static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct nlattr *nest; - - pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch, p, flow, skb, tcm); - if (list_empty(&flow->list)) - return -EINVAL; - tcm->tcm_handle = flow->common.classid; - tcm->tcm_info = flow->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) - goto nla_put_failure; - if (flow->vcc) { - struct sockaddr_atmpvc pvc; - int state; - - memset(&pvc, 0, sizeof(pvc)); - pvc.sap_family = AF_ATMPVC; - pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; - pvc.sap_addr.vpi = flow->vcc->vpi; - pvc.sap_addr.vci = flow->vcc->vci; - if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) - goto nla_put_failure; - state = ATM_VF2VS(flow->vcc->flags); - if (nla_put_u32(skb, TCA_ATM_STATE, state)) - goto nla_put_failure; - } - if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) - goto nla_put_failure; - } else { - if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) - goto nla_put_failure; - } - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} -static int -atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - - return 0; -} - -static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - return 0; -} - -static const struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .find = atm_tc_find, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_block = atm_tc_tcf_block, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, -}; - -static struct Qdisc_ops atm_qdisc_ops __read_mostly = { - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .peek = atm_tc_peek, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .dump = atm_tc_dump, - .owner = THIS_MODULE, -}; - -static int __init atm_init(void) -{ - return register_qdisc(&atm_qdisc_ops); -} - -static void __exit atm_exit(void) -{ - unregister_qdisc(&atm_qdisc_ops); -} - -module_init(atm_init) -module_exit(atm_exit) -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json deleted file mode 100644 index f5bc8670a67d1..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json +++ /dev/null @@ -1,94 +0,0 @@ -[ - { - "id": "7628", - "name": "Create ATM with default setting", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "390a", - "name": "Delete ATM with valid handle", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root atm" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "32a0", - "name": "Show ATM class", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class atm 1: parent 1:", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6310", - "name": "Dump ATM stats", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From a41f6e170b852d2c68fc76012c49ec3332b0edcc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:13 -0500 Subject: [PATCH 0211/1418] net/sched: Retire dsmark qdisc commit bbe77c14ee6185a61ba6d5e435c1cbb489d2a9ed upstream. The dsmark qdisc has served us well over the years for diffserv but has not been getting much attention due to other more popular approaches to do diffserv services. Most recently it has become a shooting target for syzkaller. For this reason, we are retiring it. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 11 - net/sched/Makefile | 1 - net/sched/sch_dsmark.c | 518 ------------------ .../tc-testing/tc-tests/qdiscs/dsmark.json | 140 ----- 4 files changed, 670 deletions(-) delete mode 100644 net/sched/sch_dsmark.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9bccbff826fb4..9c4a80fce794f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -186,17 +186,6 @@ config NET_SCH_GRED To compile this code as a module, choose M here: the module will be called sch_gred. -config NET_SCH_DSMARK - tristate "Differentiated Services marker (DSMARK)" - help - Say Y if you want to schedule packets according to the - Differentiated Services architecture proposed in RFC 2475. - Technical information on this method, with pointers to associated - RFCs, is available at . - - To compile this code as a module, choose M here: the - module will be called sch_dsmark. - config NET_SCH_NETEM tristate "Network emulator (NETEM)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 0c5762f5e07b4..a66ac1e7b79b5 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -38,7 +38,6 @@ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o -obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c deleted file mode 100644 index 401ffaf87d622..0000000000000 --- a/net/sched/sch_dsmark.c +++ /dev/null @@ -1,518 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_dsmark.c - Differentiated Services field marker */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * classid class marking - * ------- ----- ------- - * n/a 0 n/a - * x:0 1 use entry [0] - * ... ... ... - * x:y y>0 y+1 use entry [y] - * ... ... ... - * x:indices-1 indices use entry [indices-1] - * ... ... ... - * x:y y+1 use entry [y & (indices-1)] - * ... ... ... - * 0xffff 0x10000 use entry [indices-1] - */ - - -#define NO_DEFAULT_INDEX (1 << 16) - -struct mask_value { - u8 mask; - u8 value; -}; - -struct dsmark_qdisc_data { - struct Qdisc *q; - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct mask_value *mv; - u16 indices; - u8 set_tc_index; - u32 default_index; /* index range is 0...0xffff */ -#define DSMARK_EMBEDDED_SZ 16 - struct mask_value embedded[DSMARK_EMBEDDED_SZ]; -}; - -static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) -{ - return index <= p->indices && index > 0; -} - -/* ------------------------- Class/flow operations ------------------------- */ - -static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", - __func__, sch, p, new, old); - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (new == NULL) - new = &noop_qdisc; - } - - *old = qdisc_replace(sch, new, &p->q); - return 0; -} - -static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - return p->q; -} - -static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) -{ - return TC_H_MIN(classid) + 1; -} - -static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - - return dsmark_find(sch, classid); -} - -static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) -{ -} - -static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { - [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, - [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, - [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, - [TCA_DSMARK_MASK] = { .type = NLA_U8 }, - [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, -}; - -static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - - pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", - __func__, sch, p, classid, parent, *arg); - - if (!dsmark_valid_index(p, *arg)) { - err = -ENOENT; - goto errout; - } - - if (!opt) - goto errout; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - if (tb[TCA_DSMARK_VALUE]) - p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); - - if (tb[TCA_DSMARK_MASK]) - p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - - err = 0; - -errout: - return err; -} - -static int dsmark_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - if (!dsmark_valid_index(p, arg)) - return -EINVAL; - - p->mv[arg - 1].mask = 0xff; - p->mv[arg - 1].value = 0; - - return 0; -} - -static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int i; - - pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", - __func__, sch, p, walker); - - if (walker->stop) - return; - - for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) { - walker->count++; - continue; - } - if (!tc_qdisc_stats_dump(sch, i + 1, walker)) - break; - } -} - -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - return p->block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - unsigned int len = qdisc_pkt_len(skb); - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); - - if (p->set_tc_index) { - int wlen = skb_network_offset(skb); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) - & ~INET_ECN_MASK; - break; - - case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) - & ~INET_ECN_MASK; - break; - default: - skb->tc_index = 0; - break; - } - } - - if (TC_H_MAJ(skb->priority) == sch->handle) - skb->tc_index = TC_H_MIN(skb->priority); - else { - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, NULL, fl, &res, false); - - pr_debug("result %d class 0x%04x\n", result, res.classid); - - switch (result) { -#ifdef CONFIG_NET_CLS_ACT - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - - case TC_ACT_SHOT: - goto drop; -#endif - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; - } - } - - err = qdisc_enqueue(skb, p->q, to_free); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - qdisc_qstats_drop(sch); - return err; - } - - sch->qstats.backlog += len; - sch->q.qlen++; - - return NET_XMIT_SUCCESS; - -drop: - qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - u32 index; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - skb = qdisc_dequeue_peeked(p->q); - if (skb == NULL) - return NULL; - - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - index = skb->tc_index & (p->indices - 1); - pr_debug("index %d->%d\n", skb->tc_index, index); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - default: - /* - * Only complain if a change was actually attempted. - * This way, we can send non-IP traffic through dsmark - * and don't need yet another qdisc as a bypass. - */ - if (p->mv[index].mask != 0xff || p->mv[index].value) - pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb_protocol(skb, true))); - break; - } - - return skb; -} - -static struct sk_buff *dsmark_peek(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - return p->q->ops->peek(p->q); -} - -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - u32 default_index = NO_DEFAULT_INDEX; - u16 indices; - int i; - - pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); - - if (!opt) - goto errout; - - err = tcf_block_get(&p->block, &p->filter_list, sch, extack); - if (err) - return err; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - err = -EINVAL; - if (!tb[TCA_DSMARK_INDICES]) - goto errout; - indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); - - if (hweight32(indices) != 1) - goto errout; - - if (tb[TCA_DSMARK_DEFAULT_INDEX]) - default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - - if (indices <= DSMARK_EMBEDDED_SZ) - p->mv = p->embedded; - else - p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); - if (!p->mv) { - err = -ENOMEM; - goto errout; - } - for (i = 0; i < indices; i++) { - p->mv[i].mask = 0xff; - p->mv[i].value = 0; - } - p->indices = indices; - p->default_index = default_index; - p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, - NULL); - if (p->q == NULL) - p->q = &noop_qdisc; - else - qdisc_hash_add(p->q, true); - - pr_debug("%s: qdisc %p\n", __func__, p->q); - - err = 0; -errout: - return err; -} - -static void dsmark_reset(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - if (p->q) - qdisc_reset(p->q); -} - -static void dsmark_destroy(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - tcf_block_put(p->block); - qdisc_put(p->q); - if (p->mv != p->embedded) - kfree(p->mv); -} - -static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); - - if (!dsmark_valid_index(p, cl)) - return -EINVAL; - - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); - tcm->tcm_info = p->q->handle; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) - goto nla_put_failure; - - if (p->default_index != NO_DEFAULT_INDEX && - nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) - goto nla_put_failure; - - if (p->set_tc_index && - nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static const struct Qdisc_class_ops dsmark_class_ops = { - .graft = dsmark_graft, - .leaf = dsmark_leaf, - .find = dsmark_find, - .change = dsmark_change, - .delete = dsmark_delete, - .walk = dsmark_walk, - .tcf_block = dsmark_tcf_block, - .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_unbind_filter, - .dump = dsmark_dump_class, -}; - -static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &dsmark_class_ops, - .id = "dsmark", - .priv_size = sizeof(struct dsmark_qdisc_data), - .enqueue = dsmark_enqueue, - .dequeue = dsmark_dequeue, - .peek = dsmark_peek, - .init = dsmark_init, - .reset = dsmark_reset, - .destroy = dsmark_destroy, - .change = NULL, - .dump = dsmark_dump, - .owner = THIS_MODULE, -}; - -static int __init dsmark_module_init(void) -{ - return register_qdisc(&dsmark_qdisc_ops); -} - -static void __exit dsmark_module_exit(void) -{ - unregister_qdisc(&dsmark_qdisc_ops); -} - -module_init(dsmark_module_init) -module_exit(dsmark_module_exit) - -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json deleted file mode 100644 index c030795f9c37d..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json +++ /dev/null @@ -1,140 +0,0 @@ -[ - { - "id": "6345", - "name": "Create DSMARK with default setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "3462", - "name": "Create DSMARK with default_index setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "ca95", - "name": "Create DSMARK with set_tc_index flag", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "a950", - "name": "Create DSMARK with multiple setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4092", - "name": "Delete DSMARK with handle", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5930", - "name": "Show DSMARK class", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class dsmark 1:", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From 89bebf2753115fed2626402d9049436a3e7ba616 Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Mon, 2 Oct 2023 13:55:51 +0200 Subject: [PATCH 0212/1418] sched/rt: Disallow writing invalid values to sched_rt_period_us commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream. The validation of the value written to sched_rt_period_us was broken because: - the sysclt_sched_rt_period is declared as unsigned int - parsed by proc_do_intvec() - the range is asserted after the value parsed by proc_do_intvec() Because of this negative values written to the file were written into a unsigned integer that were later on interpreted as large positive integers which did passed the check: if (sysclt_sched_rt_period <= 0) return EINVAL; This commit fixes the parsing by setting explicit range for both perid_us and runtime_us into the sched_rt_sysctls table and processes the values with proc_dointvec_minmax() instead. Alternatively if we wanted to use full range of unsigned int for the period value we would have to split the proc_handler and use proc_douintvec() for it however even the Documentation/scheduller/sched-rt-group.rst describes the range as 1 to INT_MAX. As far as I can tell the only problem this causes is that the sysctl file allows writing negative values which when read back may confuse userspace. There is also a LTP test being submitted for these sysctl files at: http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/ Signed-off-by: Cyril Hrubis Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz Cc: Mahmoud Adam Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 76bafa8d331a7..8d5c77569fbb0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -37,6 +37,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rt_runtime_us", @@ -44,6 +46,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_NEG_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rr_timeslice_ms", @@ -2970,9 +2974,6 @@ static int sched_rt_global_constraints(void) #ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { - if (sysctl_sched_rt_period <= 0) - return -EINVAL; - if ((sysctl_sched_rt_runtime != RUNTIME_INF) && ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || ((u64)sysctl_sched_rt_runtime * @@ -3003,7 +3004,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_validate(); -- GitLab From 5552b7bf26d9ad3e1ded3ffd0caeb63257a3168f Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Wed, 2 Aug 2023 17:19:06 +0200 Subject: [PATCH 0213/1418] sched/rt: sysctl_sched_rr_timeslice show default timeslice after reset commit c1fc6484e1fb7cc2481d169bfef129a1b0676abe upstream. The sched_rr_timeslice can be reset to default by writing value that is <= 0. However after reading from this file we always got the last value written, which is not useful at all. $ echo -1 > /proc/sys/kernel/sched_rr_timeslice_ms $ cat /proc/sys/kernel/sched_rr_timeslice_ms -1 Fix this by setting the variable that holds the sysctl file value to the jiffies_to_msecs(RR_TIMESLICE) in case that <= 0 value was written. Signed-off-by: Cyril Hrubis Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Petr Vorel Acked-by: Mel Gorman Tested-by: Petr Vorel Cc: Mahmoud Adam Link: https://lore.kernel.org/r/20230802151906.25258-3-chrubis@suse.cz Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8d5c77569fbb0..3a2335bc1d58b 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -3048,6 +3048,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, sched_rr_timeslice = sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : msecs_to_jiffies(sysctl_sched_rr_timeslice); + + if (sysctl_sched_rr_timeslice <= 0) + sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE); } mutex_unlock(&mutex); -- GitLab From 6967ddd378e9d63f5c59dbace8b2cf963d6d6a0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:40:37 +0300 Subject: [PATCH 0214/1418] PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 upstream. The "msg_addr" variable is u64. However, the "aligned_offset" is an unsigned int. This means that when the code does: msg_addr &= ~aligned_offset; it will unintentionally zero out the high 32 bits. Use ALIGN_DOWN() to do the alignment instead. Fixes: 2217fffcd63f ("PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support") Link: https://lore.kernel.org/r/af59c7ad-ab93-40f7-ad4a-7ac0b14d37f5@moroto.mountain Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Cc: Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 59c164b5c64aa..4086a7818981a 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I */ +#include #include #include @@ -600,7 +601,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) -- GitLab From 0d27ac1779092446afb6e21666c70b7b8627d512 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 12 Jan 2024 19:37:29 +0100 Subject: [PATCH 0215/1418] riscv/efistub: Ensure GP-relative addressing is not used commit afb2a4fb84555ef9e61061f6ea63ed7087b295d5 upstream. The cflags for the RISC-V efistub were missing -mno-relax, thus were under the risk that the compiler could use GP-relative addressing. That happened for _edata with binutils-2.41 and kernel 6.1, causing the relocation to fail due to an invalid kernel_size in handle_kernel_image. It was not yet observed with newer versions, but that may just be luck. Cc: Signed-off-by: Jan Kiszka Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef5045a53ce09..b6e1dcb98a64c 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -25,7 +25,7 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ - -fpic + -fpic -mno-relax cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpie -- GitLab From c9ae228cfd1a4e292eb9680744a40a56299aa42f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 29 Oct 2023 18:07:04 +0100 Subject: [PATCH 0216/1418] dmaengine: apple-admac: Keep upper bits of REG_BUS_WIDTH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 306f5df81fcc89b462fbeb9dbe26d9a8ad7c7582 ] For RX channels, REG_BUS_WIDTH seems to default to a value of 0xf00, and macOS preserves the upper bits when setting the configuration in the lower ones. If we reset the upper bits to 0, this causes framing errors on suspend/resume (the data stream "tears" and channels get swapped around). Keeping the upper bits untouched, like the macOS driver does, fixes this issue. Signed-off-by: Hector Martin Reviewed-by: Martin Povišer Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20231029170704.82238-1-povik+lin@cutebit.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/apple-admac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 4cf8da77bdd91..cac4532fe23a9 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -56,6 +56,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -739,7 +741,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: -- GitLab From e717bd412001495f17400bfc09f606f1b594ef5a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Thu, 11 Jan 2024 15:59:41 +0300 Subject: [PATCH 0217/1418] scsi: target: core: Add TMF to tmr_list handling [ Upstream commit 83ab68168a3d990d5ff39ab030ad5754cbbccb25 ] An abort that is responded to by iSCSI itself is added to tmr_list but does not go to target core. A LUN_RESET that goes through tmr_list takes a refcounter on the abort and waits for completion. However, the abort will be never complete because it was not started in target core. Unable to locate ITT: 0x05000000 on CID: 0 Unable to locate RefTaskTag: 0x05000000 on CID: 0. wait_for_tasks: Stopping tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop wait for tasks: tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop ... INFO: task kworker/0:2:49 blocked for more than 491 seconds. task:kworker/0:2 state:D stack: 0 pid: 49 ppid: 2 flags:0x00000800 Workqueue: events target_tmr_work [target_core_mod] Call Trace: __switch_to+0x2c4/0x470 _schedule+0x314/0x1730 schedule+0x64/0x130 schedule_timeout+0x168/0x430 wait_for_completion+0x140/0x270 target_put_cmd_and_wait+0x64/0xb0 [target_core_mod] core_tmr_lun_reset+0x30/0xa0 [target_core_mod] target_tmr_work+0xc8/0x1b0 [target_core_mod] process_one_work+0x2d4/0x5d0 worker_thread+0x78/0x6c0 To fix this, only add abort to tmr_list if it will be handled by target core. Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20240111125941.8688-1-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 5 ----- drivers/target/target_core_transport.c | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 301fe376a1206..13558cbd9b82e 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0686882bcbda3..fb93d74c5d0b2 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3627,6 +3627,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; -- GitLab From b2cb83539cdb54ed693e28e8569e54c49ffb6d56 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 17 Jan 2024 05:55:39 +0000 Subject: [PATCH 0218/1418] cifs: open_cached_dir should not rely on primary channel [ Upstream commit 936eba9cfb5cfbf6a2c762cd163605f2b784e03e ] open_cached_dir today selects ses->server a.k.a primary channel to send requests. When multichannel is used, the primary channel maybe down. So it does not make sense to rely only on that channel. This fix makes this function pick a channel with the standard helper function cifs_pick_channel. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 6f4d7aa70e5a2..fd082151c5f9b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -149,7 +149,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) -- GitLab From 8dda42b1f2e40b7fdd6a039de8558448b9016748 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 0219/1418] dmaengine: shdma: increase size of 'dev_id' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 404290240827c3bb5c4e195174a8854eef2f89ac ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'dev_id' drivers/dma/sh/shdmac.c: In function ‘sh_dmae_probe’: drivers/dma/sh/shdmac.c:541:34: error: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size 9 [-Werror=format-truncation=] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~ In function ‘sh_dmae_chan_probe’, inlined from ‘sh_dmae_probe’ at drivers/dma/sh/shdmac.c:845:9: drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 2147483647] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~~~~~~~~~~~~~ drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 19] drivers/dma/sh/shdmac.c:540:17: note: ‘snprintf’ output between 11 and 21 bytes into a destination of size 16 540 | snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 541 | "sh-dmae%d.%d", pdev->id, id); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sh/shdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b33ad8..f97d80343aea4 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; -- GitLab From d3dbfb9d11fceb8bda2a4f0e5d44ae7363ca9bd1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 0220/1418] dmaengine: fsl-qdma: increase size of 'irq_name' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6386f6c995b3ab91c72cfb76e4465553c555a8da ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'irq_name' drivers/dma/fsl-qdma.c: In function ‘fsl_qdma_irq_init’: drivers/dma/fsl-qdma.c:824:46: error: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Werror=format-overflow=] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~ drivers/dma/fsl-qdma.c:824:35: note: directive argument in the range [-2147483641, 2147483646] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~ drivers/dma/fsl-qdma.c:824:17: note: ‘sprintf’ output between 12 and 22 bytes into a destination of size 20 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-qdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 69385f32e2756..f383f219ed008 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -805,7 +805,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); -- GitLab From e540c44983871b27db2a2d16bb9b25e5ed7cc748 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 16 Jan 2024 14:22:57 +0000 Subject: [PATCH 0221/1418] wifi: cfg80211: fix missing interfaces when dumping [ Upstream commit a6e4f85d3820d00694ed10f581f4c650445dbcda ] The nl80211_dump_interface() supports resumption in case nl80211_send_iface() doesn't have the resources to complete its work. The logic would store the progress as iteration offsets for rdev and wdev loops. However the logic did not properly handle resumption for non-last rdev. Assuming a system with 2 rdevs, with 2 wdevs each, this could happen: dump(cb=[0, 0]): if_start=cb[1] (=0) send rdev0.wdev0 -> ok send rdev0.wdev1 -> yield cb[1] = 1 dump(cb=[0, 1]): if_start=cb[1] (=1) send rdev0.wdev1 -> ok // since if_start=1 the rdev0.wdev0 got skipped // through if_idx < if_start send rdev1.wdev1 -> ok The if_start needs to be reset back to 0 upon wdev loop end. The problem is actually hard to hit on a desktop, and even on most routers. The prerequisites for this manifesting was: - more than 1 wiphy - a few handful of interfaces - dump without rdev or wdev filter I was seeing this with 4 wiphys 9 interfaces each. It'd miss 6 interfaces from the last wiphy reported to userspace. Signed-off-by: Michal Kazior Link: https://msgid.link/20240116142340.89678-1-kazikcz@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 70fb14b8bab07..c259d3227a9e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3960,6 +3960,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; } + if_start = 0; wp_idx++; } out: -- GitLab From eb39bb548bf974acad7bd6780fe11f9e6652d696 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jan 2024 19:10:59 +0100 Subject: [PATCH 0222/1418] wifi: mac80211: fix race condition on enabling fast-xmit [ Upstream commit bcbc84af1183c8cf3d1ca9b78540c2185cd85e7f ] fast-xmit must only be enabled after the sta has been uploaded to the driver, otherwise it could end up passing the not-yet-uploaded sta via drv_tx calls to the driver, leading to potential crashes because of uninitialized drv_priv data. Add a missing sta->uploaded check and re-check fast xmit after inserting a sta. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240104181059.84032-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f3d6c3e4c970e..bd56015b29258 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -891,6 +891,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 322a035f75929..3d62e8b718740 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3044,7 +3044,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || -- GitLab From 070398d32c5f3ab0e890374904ad94551c76aec4 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 11:49:40 +0800 Subject: [PATCH 0223/1418] fbdev: savage: Error out if pixclock equals zero [ Upstream commit 04e5eac8f3ab2ff52fa191c187a46d4fdbc1e288 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. Although pixclock is checked in savagefb_decode_var(), but it is not checked properly in savagefb_probe(). Fix this by checking whether pixclock is zero in the function savagefb_check_var() before info->var.pixclock is used as the divisor. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/savage/savagefb_driver.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index b7818b652698f..a7b63c475f954 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { -- GitLab From f329523f6a65c3bbce913ad35473d83a319d5d99 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 14:24:43 +0800 Subject: [PATCH 0224/1418] fbdev: sis: Error out if pixclock equals zero [ Upstream commit e421946be7d9bf545147bea8419ef8239cb7ca52 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. In sisfb_check_var(), var->pixclock is used as a divisor to caculate drate before it is checked against zero. Fix this by checking it at the beginning. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/sis/sis_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 1c197c3f95381..fe8996461b9ef 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1475,6 +1475,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { -- GitLab From f19361d570c67e7e014896fa2dacd7d721bf0aa8 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 23 Jan 2024 15:11:49 +0800 Subject: [PATCH 0225/1418] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected [ Upstream commit de8b6e1c231a95abf95ad097b993d34b31458ec9 ] Return IRQ_NONE from the interrupt handler when no interrupt was detected. Because an empty interrupt will cause a null pointer error: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: complete+0x54/0x100 hisi_sfc_v3xx_isr+0x2c/0x40 [spi_hisi_sfc_v3xx] __handle_irq_event_percpu+0x64/0x1e0 handle_irq_event+0x7c/0x1cc Signed-off-by: Devyn Liu Link: https://msgid.link/r/20240123071149.917678-1-liudingyuan@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-hisi-sfc-v3xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index d3a23b1c2a4c5..61bf00dfe9c33 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); -- GitLab From 8fc80874103a5c20aebdc2401361aa01c817f75b Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:26:34 +0100 Subject: [PATCH 0226/1418] block: Fix WARNING in _copy_from_iter [ Upstream commit 13f3956eb5681a4045a8dfdef48df5dc4d9f58a6 ] Syzkaller reports a warning in _copy_from_iter because an iov_iter is supposedly used in the wrong direction. The reason is that syzcaller managed to generate a request with a transfer direction of SG_DXFER_TO_FROM_DEV. This instructs the kernel to copy user buffers into the kernel, read into the copied buffers and then copy the data back to user space. Thus the iovec is used in both directions. Detect this situation in the block layer and construct a new iterator with the correct direction for the copy-in. Reported-by: syzbot+a532b03fdfee2c137666@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000009b92c10604d7a5e9@google.com/t/ Reported-by: syzbot+63dec323ac56c28e644f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000003faaa105f6e7c658@google.com/T/ Signed-off-by: Christian A. Ehrhardt Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240121202634.275068-1-lk@c--e.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-map.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 66da9e2b19abf..b337ae347bfa3 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -203,12 +203,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); -- GitLab From bba595eb1422749ad9aea7e98991fa9db04bb26d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Jan 2024 15:47:34 -0800 Subject: [PATCH 0227/1418] smb: Work around Clang __bdos() type confusion [ Upstream commit 8deb05c84b63b4fdb8549e08942867a68924a5b8 ] Recent versions of Clang gets confused about the possible size of the "user" allocation, and CONFIG_FORTIFY_SOURCE ends up emitting a warning[1]: repro.c:126:4: warning: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 126 | __write_overflow_field(p_size_field, size); | ^ for this memset(): int len; __le16 *user; ... len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); ... if (len) { ... } else { memset(user, '\0', 2); } While Clang works on this bug[2], switch to using a direct assignment, which avoids memset() entirely which both simplifies the code and silences the false positive warning. (Making "len" size_t also silences the warning, but the direct assignment seems better.) Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/1966 [1] Link: https://github.com/llvm/llvm-project/issues/77813 [2] Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index d0ac2648c0d61..d3d4cf6321fd5 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -444,7 +444,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, -- GitLab From a2aa77b5d8e3f521a893de21bda5d823649d1c09 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:45 +0000 Subject: [PATCH 0228/1418] cifs: translate network errors on send to -ECONNABORTED [ Upstream commit a68106a6928e0a6680f12bcc7338c0dddcfe4d11 ] When the network stack returns various errors, we today bubble up the error to the user (in case of soft mounts). This change translates all network errors except -EINTR and -EAGAIN to -ECONNABORTED. A similar approach is taken when we receive network errors when reading from the socket. The change also forces the cifsd thread to reconnect during it's next activity. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8a1dd8407a3a7..97bf46de8e429 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -427,10 +427,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); -- GitLab From 41e137c2c75aed1b5f8595f2b2f8970539ba56bf Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 23 Jan 2024 19:30:02 +0100 Subject: [PATCH 0229/1418] ahci: asm1166: correct count of reported ports [ Upstream commit 0077a504e1a4468669fd2e011108db49133db56e ] The ASM1166 SATA host controller always reports wrongly, that it has 32 ports. But in reality, it only has six ports. This seems to be a hardware issue, as all tested ASM1166 SATA host controllers reports such high count of ports. Example output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0xffffff3f impl SATA mode. By adjusting the port_map, the count is limited to six ports. New output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0x3f impl SATA mode. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=211873 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218346 Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 805645efb3ccf..e22124f42183f 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -658,6 +658,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; -- GitLab From 2d623c94fbba3554f4446ba6f3c764994e8b0d26 Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Wed, 24 Jan 2024 10:24:36 +0300 Subject: [PATCH 0230/1418] aoe: avoid potential deadlock at set_capacity [ Upstream commit e169bd4fb2b36c4b2bee63c35c740c85daeb2e86 ] Move set_capacity() outside of the section procected by (&d->lock). To avoid possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- [1] lock(&bdev->bd_size_lock); local_irq_disable(); [2] lock(&d->lock); [3] lock(&bdev->bd_size_lock); [4] lock(&d->lock); *** DEADLOCK *** Where [1](&bdev->bd_size_lock) hold by zram_add()->set_capacity(). [2]lock(&d->lock) hold by aoeblk_gdalloc(). And aoeblk_gdalloc() is trying to acquire [3](&bdev->bd_size_lock) at set_capacity() call. In this situation an attempt to acquire [4]lock(&d->lock) from aoecmd_cfg_rsp() will lead to deadlock. So the simplest solution is breaking lock dependency [2](&d->lock) -> [3](&bdev->bd_size_lock) by moving set_capacity() outside. Signed-off-by: Maksim Kiselev Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240124072436.3745720-2-bigunclemax@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/aoe/aoeblk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 128722cf6c3ca..827802e418dd3 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; -- GitLab From f48a6eb2e5e88c7548a8f20f6ad8131b06427655 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 25 Jan 2024 17:04:01 +0200 Subject: [PATCH 0231/1418] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers [ Upstream commit 20730e9b277873deeb6637339edcba64468f3da3 ] With one of the on-board ASM1061 AHCI controllers (1b21:0612) on an ASUSTeK Pro WS WRX80E-SAGE SE WIFI mainboard, a controller hang was observed that was immediately preceded by the following kernel messages: ahci 0000:28:00.0: Using 64-bit DMA addresses ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00000 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00300 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00380 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00400 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00680 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00700 flags=0x0000] The first message is produced by code in drivers/iommu/dma-iommu.c which is accompanied by the following comment that seems to apply: /* * Try to use all the 32-bit PCI addresses first. The original SAC vs. * DAC reasoning loses relevance with PCIe, but enough hardware and * firmware bugs are still lurking out there that it's safest not to * venture into the 64-bit space until necessary. * * If your device goes wrong after seeing the notice then likely either * its driver is not setting DMA masks accurately, the hardware has * some inherent bug in handling >32-bit addresses, or not all the * expected address bits are wired up between the device and the IOMMU. */ Asking the ASM1061 on a discrete PCIe card to DMA from I/O virtual address 0xffffffff00000000 produces the following I/O page faults: vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000000 flags=0x0010] vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000500 flags=0x0010] Note that the upper 21 bits of the logged DMA address are zero. (When asking a different PCIe device in the same PCIe slot to DMA to the same I/O virtual address, we do see all the upper 32 bits of the DMA address as 1, so this is not an issue with the chipset or IOMMU configuration on the test system.) Also, hacking libahci to always set the upper 21 bits of all DMA addresses to 1 produces no discernible effect on the behavior of the ASM1061, and mkfs/mount/scrub/etc work as without this hack. This all strongly suggests that the ASM1061 has a 43 bit DMA address limit, and this commit therefore adds a quirk to deal with this limit. This issue probably applies to (some of) the other supported ASMedia parts as well, but we limit it to the PCI IDs known to refer to ASM1061 parts, as that's the only part we know for sure to be affected by this issue at this point. Link: https://lore.kernel.org/linux-ide/ZaZ2PIpEId-rl6jv@wantstofly.org/ Signed-off-by: Lennert Buytenhek [cassel: drop date from error messages in commit log] Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 29 +++++++++++++++++++++++------ drivers/ata/ahci.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e22124f42183f..42c6b660550c2 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -49,6 +49,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -129,6 +130,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,11 +605,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ + /* ASMedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ @@ -949,11 +957,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1926,7 +1943,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ff8e6ae1c6362..f9c5906a8afa8 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ -- GitLab From 6c292c2f902760fcd82cbf8609ca766ea89fe34e Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:57 +0800 Subject: [PATCH 0232/1418] MIPS: reserve exception vector space ONLY ONCE [ Upstream commit abcabb9e30a1f9a69c76776f8abffc31c377b542 ] "cpu_probe" is called both by BP and APs, but reserving exception vector (like 0x0-0x1000) called by "cpu_probe" need once and calling on APs is too late since memblock is unavailable at that time. So, reserve exception vector ONLY by BP. Suggested-by: Thomas Bogendoerfer Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 246c6a6b02614..5b778995d4483 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) -- GitLab From a600d7f0c1d0872131186a42f30e77700fef1e14 Mon Sep 17 00:00:00 2001 From: Phoenix Chen Date: Fri, 26 Jan 2024 17:53:08 +0800 Subject: [PATCH 0233/1418] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet [ Upstream commit 1abdf288b0ef5606f76b6e191fa6df05330e3d7e ] Add touch screen info for TECLAST X16 Plus tablet. Signed-off-by: Phoenix Chen Link: https://lore.kernel.org/r/20240126095308.5042-1-asbeltogf@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 9a92d515abb9b..50ec19188a20d 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -913,6 +913,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1567,6 +1593,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, -- GitLab From 8b40eb2e716b503f7a4e1090815a17b1341b2150 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:37 +0800 Subject: [PATCH 0234/1418] ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt [ Upstream commit 993bf0f4c393b3667830918f9247438a8f6fdb5b ] Determine if bb_fragments is 0 instead of determining bb_free to eliminate the risk of dividing by zero when the block bitmap is corrupted. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-6-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1a310ee7d9e55..296185cbd1547 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,7 +831,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, -- GitLab From f97e75fa4e12b0aa0224e83fcbda8853ac2adf36 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 0235/1418] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() [ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ] Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 296185cbd1547..744472c0b6fa5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2176,6 +2176,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2183,6 +2186,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- GitLab From d639102f4cbd4cb65d1225dba3b9265596aab586 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 0236/1418] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() [ Upstream commit 832698373a25950942c04a512daa652c18a9b513 ] Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 744472c0b6fa5..6a3e27771df73 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2215,12 +2215,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2253,6 +2251,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- GitLab From 3e746c4e4848914fa2d1ad5192cab5c589e6be13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2023 15:16:50 +0100 Subject: [PATCH 0237/1418] Input: goodix - accept ACPI resources with gpio_count == 3 && gpio_int_idx == 0 [ Upstream commit 180a8f12c21f41740fee09ca7f7aa98ff5bb99f8 ] Some devices list 3 Gpio resources in the ACPI resource list for the touchscreen: 1. GpioInt resource pointing to the GPIO used for the interrupt 2. GpioIo resource pointing to the reset GPIO 3. GpioIo resource pointing to the GPIO used for the interrupt Note how the third extra GpioIo resource really is a duplicate of the GpioInt provided info. Ignore this extra GPIO, treating this setup the same as gpio_count == 2 && gpio_int_idx == 0 fixes the touchscreen not working on the Thunderbook Colossus W803 rugged tablet and likely also on the CyberBook_T116K. Reported-by: Maarten van der Schrieck Closes: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/-/issues/22 Suggested-by: Maarten van der Schrieck Tested-by: Maarten van der Schrieck Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231223141650.10679-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 3f0732db7bf5b..6de64b3f900fb 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { -- GitLab From 9d508c897153ae8dd79303f7f035f078139f6b49 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:19:29 +0800 Subject: [PATCH 0238/1418] dmaengine: ti: edma: Add some null pointer checks to the edma_probe [ Upstream commit 6e2276203ac9ff10fc76917ec9813c660f627369 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118031929.192192-1-chentao@kylinos.cn Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ti/edma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 7ec6e5d728b03..9212ac9f978f2 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2413,6 +2413,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2429,6 +2434,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { -- GitLab From 13c1af5f3bc4c1959f492fbb846aee30aa1fca28 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 0239/1418] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c92688cac239794e4a1d976afa5203a4d3a2ac0e ] Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index b9eeaff1c6615..925e486f73a6d 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. -- GitLab From fbd1cb2a9b77ec166eaf7931df150149116da7d1 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 26 Jan 2024 16:26:43 +0800 Subject: [PATCH 0240/1418] nvmet-tcp: fix nvme tcp ida memory leak [ Upstream commit 47c5dd66c1840524572dcdd956f4af2bdb6fbdff ] The nvmet_tcp_queue_ida should be destroy when the nvmet-tcp module exit. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ce42afe8f64ef..3480768274699 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1884,6 +1884,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); -- GitLab From 1675aae9e19e72c48e1c95338b894845944a855b Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:23 +0100 Subject: [PATCH 0241/1418] usb: ucsi_acpi: Quirk to ack a connector change ack cmd [ Upstream commit f3be347ea42dbb0358cd8b2d8dc543a23b70a976 ] The PPM on some Dell laptops seems to expect that the ACK_CC_CI command to clear the connector change notification is in turn followed by another ACK_CC_CI to acknowledge the ACK_CC_CI command itself. This is in violation of the UCSI spec that states: "The only notification that is not acknowledged by the OPM is the command completion notification for the ACK_CC_CI or the PPM_RESET command." Add a quirk to send this ack anyway. Apply the quirk to all Dell systems. On the first command that acks a connector change send a dummy command to determine if it runs into a timeout. Only activate the quirk if it does. This ensure that we do not break Dell systems that do not need the quirk. Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi_acpi.c | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 26171c5d3c61c..48130d636a020 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -126,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -160,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -189,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) -- GitLab From c7bdaff0d07505e319cf89548bc115969e9fb816 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jan 2024 15:12:54 +0300 Subject: [PATCH 0242/1418] ALSA: usb-audio: Check presence of valid altsetting control [ Upstream commit 346f59d1e8ed0eed41c80e1acb657e484c308e6a ] Many devices with a single alternate setting do not have a Valid Alternate Setting Control and validation performed by validate_sample_rate_table_v2v3() doesn't work on them and is not really needed. So check the presense of control before sending altsetting validation requests. MOTU Microbook IIc is suffering the most without this check. It takes up to 40 seconds to bootup due to how slow it switches sampling rates: [ 2659.164824] usb 3-2: New USB device found, idVendor=07fd, idProduct=0004, bcdDevice= 0.60 [ 2659.164827] usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2659.164829] usb 3-2: Product: MicroBook IIc [ 2659.164830] usb 3-2: Manufacturer: MOTU [ 2659.166204] usb 3-2: Found last interface = 3 [ 2679.322298] usb 3-2: No valid sample rate available for 1:1, assuming a firmware bug [ 2679.322306] usb 3-2: 1:1: add audio endpoint 0x3 [ 2679.322321] usb 3-2: Creating new data endpoint #3 [ 2679.322552] usb 3-2: 1:1 Set sample rate 96000, clock 1 [ 2684.362250] usb 3-2: 2:1: cannot get freq (v2/v3): err -110 [ 2694.444700] usb 3-2: No valid sample rate available for 2:1, assuming a firmware bug [ 2694.444707] usb 3-2: 2:1: add audio endpoint 0x84 [ 2694.444721] usb 3-2: Creating new data endpoint #84 [ 2699.482103] usb 3-2: 2:1 Set sample rate 96000, clock 1 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240129121254.3454481-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/format.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b60..3b45d0ee76938 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; -- GitLab From 4dec3068eaa5b51693514d4f41ae3cc744baf4cf Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:43 +0800 Subject: [PATCH 0243/1418] ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616 [ Upstream commit 0adf963b8463faa44653e22e56ce55f747e68868 ] The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Since the driver currently only supports the transmit function, support for the H616 is identical to what is currently done for the H6. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-4-wens@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sunxi/sun4i-spdif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index bcceebca915ac..484b0e7c2defa 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -578,6 +578,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); -- GitLab From 949296ee62db11f8afb5ea414dd317a6b9a76712 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 0244/1418] spi: sh-msiof: avoid integer overflow in constants [ Upstream commit 6500ad28fd5d67d5ca0fee9da73c463090842440 ] cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 51ceaa4857249..ec3a4939ee984 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -137,14 +137,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) -- GitLab From 1c57e5ef85c736637c36ffd44c39fa6e9c7ea4ba Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 30 Jan 2024 13:34:16 -0800 Subject: [PATCH 0245/1418] Input: xpad - add Lenovo Legion Go controllers [ Upstream commit 80441f76ee67002437db61f3b317ed80cce085d2 ] The Lenovo Legion Go is a handheld gaming system, similar to a Steam Deck. It has a gamepad (including rear paddles), 3 gyroscopes, a trackpad, volume buttons, a power button, and 2 LED ring lights. The Legion Go firmware presents these controls as a USB hub with various devices attached. In its default state, the gamepad is presented as an Xbox controller connected to this hub. (By holding a combination of buttons, it can be changed to use the older DirectInput API.) This patch teaches the existing Xbox controller module `xpad` to bind to the controller in the Legion Go, which enables support for the: - directional pad, - analog sticks (including clicks), - X, Y, A, B, - start and select (or menu and capture), - shoulder buttons, and - rumble. The trackpad, touchscreen, volume controls, and power button are already supported via existing kernel modules. Two of the face buttons, the gyroscopes, rear paddles, and LEDs are not. After this patch lands, the Legion Go will be mostly functional in Linux, out-of-the-box. The various components of the USB hub can be synthesized into a single logical controller (including the additional buttons) in userspace with [Handheld Daemon](https://github.com/hhd-dev/hhd), which makes the Go fully functional. Signed-off-by: Brenton Simpson Link: https://lore.kernel.org/r/20240118183546.418064-1-appsforartists@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e8011d70d0799..02f3bc4e4895e 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -489,6 +490,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ -- GitLab From 17fe3616d854de8f4f0c29bcdb5de100a68bddca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Jan 2024 15:24:10 +0000 Subject: [PATCH 0246/1418] misc: open-dice: Fix spurious lockdep warning [ Upstream commit ac9762a74c7ca7cbfcb4c65f5871373653a046ac ] When probing the open-dice driver with PROVE_LOCKING=y, lockdep complains that the mutex in 'drvdata->lock' has a non-static key: | INFO: trying to register non-static key. | The code is fine but needs lockdep annotation, or maybe | you didn't initialize this object before use? | turning off the locking correctness validator. Fix the problem by initialising the mutex memory with mutex_init() instead of __MUTEX_INITIALIZER(). Cc: Arnd Bergmann Cc: David Brazdil Cc: Greg Kroah-Hartman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240126152410.10148-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/open-dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index c61be3404c6f2..504b836a7abf8 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -142,7 +142,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -152,6 +151,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); -- GitLab From 41b256f473ac0cd5b6a4973f4cd75c6103661ab5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 0247/1418] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new [ Upstream commit 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 ] The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c94a9971d790c..7ffd698497f2a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -299,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", -- GitLab From bead6ff98689091b89591a221035d1fe90cf9cdb Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Tue, 16 Jan 2024 11:00:00 -0500 Subject: [PATCH 0248/1418] drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz [ Upstream commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a ] [why] Originally, PMFW said min FCLK is 300Mhz, but min DCFCLK can be increased to 400Mhz because min FCLK is now 600Mhz so FCLK >= 1.5 * DCFCLK hardware requirement will still be satisfied. Increasing min DCFCLK addresses underflow issues (underflow occurs when phantom pipe is turned on for some Sub-Viewport configs). [how] Increasing DCFCLK by raising the min_dcfclk_mhz Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 85e0d1c2a9085..baecc0ffe7580 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2123,7 +2123,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From 5653a6d65a11fd41afa79977aaab7f3aa7b6fbad Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:21 +0000 Subject: [PATCH 0249/1418] ASoC: wm_adsp: Don't overwrite fwf_name with the default [ Upstream commit daf3f0f99cde93a066240462b7a87cdfeedc04c0 ] There's no need to overwrite fwf_name with a kstrdup() of the cs_dsp part name. It is trivial to select either fwf_name or cs_dsp.part as the string to use when building the filename in wm_adsp_request_firmware_file(). This leaves fwf_name entirely owned by the codec driver. It also avoids problems with freeing the pointer. With the original code fwf_name was either a pointer owned by the codec driver, or a kstrdup() created by wm_adsp. This meant wm_adsp must free it if it set it, but not if the codec driver set it. The code was handling this by using devm_kstrdup(). But there is no absolute requirement that wm_adsp_common_init() must be called from probe(), so this was a pseudo-memory leak - each new call to wm_adsp_common_init() would allocate another block of memory but these would only be freed if the owning codec driver was removed. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240129162737.497-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 2cfca78f0401f..47a4c363227cc 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -740,19 +740,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -842,29 +848,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } -- GitLab From dee697ac5330e36f050b4ed0d6f520b8738fe12a Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 1 Feb 2024 14:53:08 +0300 Subject: [PATCH 0250/1418] ALSA: usb-audio: Ignore clock selector errors for single connection [ Upstream commit eaa1b01fe709d6a236a9cec74813e0400601fd23 ] For devices with multiple clock sources connected to a selector, we need to check what a clock selector control request has returned. This is needed to ensure that a requested clock source is indeed selected and for autoclock feature to work. For devices with single clock source connected, if we get an error there is nothing else we can do about it. We can't skip clock selector setup as it is required by some devices. So lets just ignore error in this case. This should fix various buggy Mackie devices: [ 649.109785] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.111946] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.113822] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) There is also interesting info from the Windows documentation [1] (this is probably why manufacturers dont't even test this feature): "The USB Audio 2.0 driver doesn't support clock selection. The driver uses the Clock Source Entity, which is selected by default and never issues a Clock Selector Control SET CUR request." Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/usb-2-0-audio-drivers [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=217314 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218175 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218342 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240201115308.17838-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/clock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e65566..a676ad093d189 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -328,8 +328,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) -- GitLab From 085195aa90a924c79e35569bcdad860d764a8e17 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:01 +0100 Subject: [PATCH 0251/1418] nvme-fc: do not wait in vain when unloading module [ Upstream commit 70fbfc47a392b98e5f8dba70c6efc6839205c982 ] The module exit path has race between deleting all controllers and freeing 'left over IDs'. To prevent double free a synchronization between nvme_delete_ctrl and ida_destroy has been added by the initial commit. There is some logic around trying to prevent from hanging forever in wait_for_completion, though it does not handling all cases. E.g. blktests is able to reproduce the situation where the module unload hangs forever. If we completely rely on the cleanup code executed from the nvme_delete_ctrl path, all IDs will be freed eventually. This makes calling ida_destroy unnecessary. We only have to ensure that all nvme_delete_ctrl code has been executed before we leave nvme_fc_exit_module. This is done by flushing the nvme_delete_wq workqueue. While at it, remove the unused nvme_fc_wq workqueue too. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 177a365b8ec55..3dbf926fd99fd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3869,10 +3862,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3890,7 +3879,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3914,8 +3903,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3935,45 +3922,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); -- GitLab From 16b2b31ba886694d9fc06950a1a230adc4357350 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:02 +0100 Subject: [PATCH 0252/1418] nvmet-fcloop: swap the list_add_tail arguments [ Upstream commit dcfad4ab4d6733f2861cd241d8532a0004fc835a ] The first argument of list_add_tail function is the new element which should be added to the list which is the second argument. Swap the arguments to allow processing more than one element at a time. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fcloop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c780af36c1d4a..f5b8442b653db 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; -- GitLab From 6319ab29d5e01c7a50e9a6503e0555b834905fc8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:03 +0100 Subject: [PATCH 0253/1418] nvmet-fc: release reference on target port [ Upstream commit c691e6d7e13dab81ac8c7489c83b5dea972522a5 ] In case we return early out of __nvmet_fc_finish_ls_req() we still have to release the reference on the target port. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fdd5cf..0075d9636b065 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -359,7 +359,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_puttgtport; } list_del(&lsop->lsreq_list); @@ -372,6 +372,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); +out_puttgtport: nvmet_fc_tgtport_put(tgtport); } -- GitLab From b8338116689a5b541738a8bb89457e0c6c4d497e Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:04 +0100 Subject: [PATCH 0254/1418] nvmet-fc: defer cleanup using RCU properly [ Upstream commit 4049dc96b8de7aeb3addcea039446e464726a525 ] When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 83 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 0075d9636b065..c9ef642313c8f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -165,7 +165,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -802,14 +802,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -831,15 +828,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -852,12 +847,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -969,7 +960,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1172,13 +1163,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1208,7 +1204,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1217,29 +1213,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1492,9 +1480,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1547,9 +1534,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1567,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1593,9 +1579,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1625,6 +1610,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1870,9 +1857,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1887,8 +1871,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1904,6 +1886,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2902,6 +2887,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2933,6 +2921,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); -- GitLab From 67e2ddf2324ad55a555cdfb21802c8edce250372 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:06 +0100 Subject: [PATCH 0255/1418] nvmet-fc: hold reference on hostport match [ Upstream commit ca121a0f7515591dba0eb5532bfa7ace4dc153ce ] The hostport data structure is shared between the association, this why we keep track of the users via a refcount. So we should not decrement the refcount on a match and free the hostport several times. Reported by KASAN. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index c9ef642313c8f..64c26b703860c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1069,8 +1069,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; -- GitLab From 8b9e4539493b4e3f3a2de7ccdf9e75fc962622d9 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:09 +0100 Subject: [PATCH 0256/1418] nvmet-fc: abort command when there is no binding [ Upstream commit 3146345c2e9c2f661527054e402b0cfad80105a4 ] When the target port has not active port binding, there is no point in trying to process the command as it has to fail anyway. Instead adding checks to all commands abort the command early. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 64c26b703860c..b4b2631eb530e 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1101,6 +1101,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -2523,8 +2526,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); -- GitLab From 9e6987f8937a7bd7516aa52f25cb7e12c0c92ee8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:10 +0100 Subject: [PATCH 0257/1418] nvmet-fc: avoid deadlock on delete association path [ Upstream commit 710c69dbaccdac312e32931abcb8499c1525d397 ] When deleting an association the shutdown path is deadlocking because we try to flush the nvmet_wq nested. Avoid this by deadlock by deferring the put work into its own work item. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b4b2631eb530e..36cae038eb045 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -248,6 +250,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - goto out_puttgtport; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,8 +381,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); -out_puttgtport: - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -1404,6 +1413,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { -- GitLab From f9eef0e495159b83e46bddd9e2409ad81b2f5f96 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:11 +0100 Subject: [PATCH 0258/1418] nvmet-fc: take ref count on tgtport before delete assoc [ Upstream commit fe506a74589326183297d5abdda02d0c76ae5a8b ] We have to ensure that the tgtport is not going away before be have remove all the associations. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 36cae038eb045..8a02ed63b1566 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1092,13 +1092,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1132,7 +1147,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1491,7 +1506,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); @@ -1545,7 +1560,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1590,7 +1605,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return; } @@ -1897,7 +1912,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return false; -- GitLab From be36276cb88b361c14472dbd74805b786cf1725c Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 29 Jan 2024 21:04:44 -0300 Subject: [PATCH 0259/1418] smb: client: increase number of PDUs allowed in a compound request [ Upstream commit 11d4d1dba3315f73d2d1d386f5bf4811a8241d45 ] With the introduction of SMB2_OP_QUERY_WSL_EA, the client may now send 5 commands in a single compound request in order to query xattrs from potential WSL reparse points, which should be fine as we currently allow up to 5 PDUs in a single compound request. However, if encryption is enabled (e.g. 'seal' mount option) or enforced by the server, current MAX_COMPOUND(5) won't be enough as we require an extra PDU for the transform header. Fix this by increasing MAX_COMPOUND to 7 and, while we're at it, add an WARN_ON_ONCE() and return -EIO instead of -ENOMEM in case we attempt to send a compound request that couldn't include the extra transform header. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/transport.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7f1aea4c11b9c..58bb54994e22a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -86,7 +86,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 97bf46de8e429..df44acaec9ae9 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -456,8 +456,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); -- GitLab From 200627f46e0335cbef76ef52ce5cf9f1e9f3b135 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:02 +0800 Subject: [PATCH 0260/1418] ext4: correct the hole length returned by ext4_map_blocks() [ Upstream commit 6430dea07e85958fa87d0276c0c4388dd51e630b ] In ext4_map_blocks(), if we can't find a range of mapping in the extents cache, we are calling ext4_ext_map_blocks() to search the real path and ext4_ext_determine_hole() to determine the hole range. But if the querying range was partially or completely overlaped by a delalloc extent, we can't find it in the real extent path, so the returned hole length could be incorrect. Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc extent, but it searches start from the expanded hole_start, doesn't start from the querying range, so the delalloc extent found could not be the one that overlaped the querying range, plus, it also didn't adjust the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle delalloc and insert adjusted hole extent in ext4_ext_determine_hole(). Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-4-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aa5aadd70bbc2..67af684e44e6e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4064,6 +4040,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4181,22 +4220,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } -- GitLab From b3a996b106948ec2f347090b2550adfe0e6d3d4d Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Fri, 2 Feb 2024 10:28:59 -0800 Subject: [PATCH 0261/1418] Input: i8042 - add Fujitsu Lifebook U728 to i8042 quirk table [ Upstream commit 4255447ad34c5c3785fcdcf76cfa0271d6e5ed39 ] Another Fujitsu-related patch. In the initial boot stage the integrated keyboard of Fujitsu Lifebook U728 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:3092) not working at all. So this notebook uses a hid-over-i2c touchpad which is managed by the i2c_designware input driver. Since you can't find a PS/2 mouse port on this computer and you can't connect a PS/2 mouse to it even with an official port replicator I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20240103014717.127307-2-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index cd45a65e17f2c..dfc6c581873b7 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { -- GitLab From 9c66843606921d4f4a6f6c3275cded2963f36051 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:17:07 +0300 Subject: [PATCH 0262/1418] fs/ntfs3: Modified fix directory element type detection [ Upstream commit 22457c047ed971f2f2e33be593ddfabd9639a409 ] Unfortunately reparse attribute is used for many purposes (several dozens). It is not possible here to know is this name symlink or not. To get exactly the type of name we should to open inode (read mft). getattr for opened file (fstat) correctly returns symlink. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index d4d9f4ffb6d9a..c2fb76bb28f47 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } -- GitLab From 1970b5f2048f646d61327cd9685698b96ba31941 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:24:33 +0300 Subject: [PATCH 0263/1418] fs/ntfs3: Improve ntfs_dir_count [ Upstream commit 6a799c928b78b14999b7705c4cca0f88e297fe96 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index c2fb76bb28f47..72cdfa8727d3c 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -515,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -563,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -573,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -584,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: -- GitLab From 25d1694d6e34321d3dacccafe85f6b15719a5c62 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:26:31 +0300 Subject: [PATCH 0264/1418] fs/ntfs3: Correct hard links updating when dealing with DOS names [ Upstream commit 1918c10e137eae266b8eb0ab1cc14421dcb0e3e2 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/record.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ba336c7280b85..ab03c373cec66 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -491,8 +491,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; -- GitLab From a9f7d7656fbcea654a3db7860033cf2667730c00 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:34:24 +0300 Subject: [PATCH 0265/1418] fs/ntfs3: Print warning while fixing hard links count [ Upstream commit 85ba2a75faee759809a7e43b4c103ac59bac1026 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index dc937089a464a..42dd9fdaf4151 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -402,7 +402,6 @@ end_enum: goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -417,6 +416,7 @@ end_enum: if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } -- GitLab From 95bad562e575d6bbe623cfd234946573b0a48812 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:47:30 +0300 Subject: [PATCH 0266/1418] fs/ntfs3: Fix detected field-spanning write (size 8) of single field "le->name" [ Upstream commit d155617006ebc172a80d3eb013c4b867f9a8ada4 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 0f38d558169a1..8b580515b1d6e 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -517,7 +517,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) -- GitLab From 50545eb6cd5f7ff852a01fa29b7372524ef948cc Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:09:34 +0300 Subject: [PATCH 0267/1418] fs/ntfs3: Add NULL ptr dereference checking at the end of attr_allocate_frame() [ Upstream commit aaab47f204aaf47838241d57bf8662c8840de60a ] It is preferable to exit through the out: label because internal debugging functions are located there. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrib.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 2215179c925b3..2618bf5a37892 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1658,8 +1658,10 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1740,13 +1742,15 @@ ins_ext: ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; -- GitLab From ee12c3102027ba3c1ea08ca292dc9d7ccf7c21e4 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 21 Dec 2023 13:59:43 +0300 Subject: [PATCH 0268/1418] fs/ntfs3: Disable ATTR_LIST_ENTRY size check [ Upstream commit 4cdfb6e7bc9c80142d33bf1d4653a73fa678ba56 ] The use of sizeof(struct ATTR_LIST_ENTRY) has been replaced with le_size(0) due to alignment peculiarities on different platforms. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312071005.g6YrbaIe-lkp@intel.com/ Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrlist.c | 8 ++++---- fs/ntfs3/ntfs.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 0c6a68e71e7d4..723e49ec83ce7 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 8b580515b1d6e..ba26a465b3091 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -521,8 +521,6 @@ struct ATTR_LIST_ENTRY { }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + -- GitLab From 1c005ce9934c0b784b8123e2dee7aa2dbf7ebce2 Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Tue, 26 Dec 2023 16:51:41 +0800 Subject: [PATCH 0269/1418] fs/ntfs3: use non-movable memory for ntfs3 MFT buffer cache [ Upstream commit d6d33f03baa43d763fe094ca926eeae7d3421d07 ] Since the buffer cache for ntfs3 metadata is not released until the file system is unmounted, allocating from the movable zone may result in cma allocation failures. This is due to the page still being used by ntfs3, leading to migration failures. To address this, this commit use sb_bread_umovable() instead of sb_bread(). This change prevents allocation from the movable zone, ensuring compatibility with scenarios where the buffer head is not released until unmount. This patch is inspired by commit a8ac900b8163("ext4: use non-movable memory for the ext4 superblock"). The issue is found when playing video files stored in NTFS on the Android TV platform. During this process, the media parser reads the video file, causing ntfs3 to allocate buffer cache from the CMA area. Subsequently, the hardware decoder attempts to allocate memory from the same CMA area. However, the page is still in use by ntfs3, resulting in a migrate failure in alloc_contig_range(). The pinned page and allocating stacktrace reported by page owner shows below: page:ffffffff00b68880 refcount:3 mapcount:0 mapping:ffffff80046aa828 index:0xc0040 pfn:0x20fa4 aops:def_blk_aops ino:0 flags: 0x2020(active|private) page dumped because: migration failure page last allocated via order 0, migratetype Movable, gfp_mask 0x108c48 (GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE), page_owner tracks the page as allocated prep_new_page get_page_from_freelist __alloc_pages_nodemask pagecache_get_page __getblk_gfp __bread_gfp ntfs_read_run_nb ntfs_read_bh mi_read ntfs_iget5 dir_search_u ntfs_lookup __lookup_slow lookup_slow walk_component path_lookupat Signed-off-by: Ism Hong Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 74482ef569ab7..c9ba0d27601dc 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1015,7 +1015,7 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) static inline struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { - struct buffer_head *bh = sb_bread(sb, block); + struct buffer_head *bh = sb_bread_unmovable(sb, block); if (bh) return bh; -- GitLab From 0d2f804b9f5424d1970403d7bfcf1f22b7853e25 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:03:21 +0300 Subject: [PATCH 0270/1418] fs/ntfs3: Prevent generic message "attempt to access beyond end of device" [ Upstream commit 5ca87d01eba7bdfe9536a157ca33c1455bb8d16c ] It used in test environment. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 24 ++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 14 +------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4b72bc7f12ca3..1eac80d55b554 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -976,6 +976,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index c9ba0d27601dc..0f9bec29f2b70 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -580,6 +580,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1012,19 +1013,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread_unmovable(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); -- GitLab From 32a3974b26dfbb059d15ab8d5683d8b9de647468 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:13:59 +0300 Subject: [PATCH 0271/1418] fs/ntfs3: Correct function is_rst_area_valid [ Upstream commit 1b7dd28e14c4728ae1a815605ca33ffb4ce1b309 ] Reported-by: Robert Morris Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fslog.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 710cb5aa5a65b..d53ef128fa733 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) -- GitLab From 5d67a4ff3dfe19aceb185d4c3912796e2d0ac4e1 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 29 Jan 2024 10:30:09 +0300 Subject: [PATCH 0272/1418] fs/ntfs3: Update inode->i_size after success write into compressed file [ Upstream commit d68968440b1a75dee05cfac7f368f1aa139e1911 ] Reported-by: Giovanni Santini Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index f31c0389a2e7d..14efe46df91ef 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1110,6 +1110,8 @@ out: iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } -- GitLab From 6ed6cdbe88334ca3430c5aee7754dc4597498dfb Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sat, 30 Dec 2023 17:00:03 +0800 Subject: [PATCH 0273/1418] fs/ntfs3: Fix oob in ntfs_listxattr [ Upstream commit 731ab1f9828800df871c5a7ab9ffe965317d3f15 ] The length of name cannot exceed the space occupied by ea. Reported-and-tested-by: syzbot+65e940cfb8f99a97aca7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index df15e00c2a3a0..d98cf7b382bcc 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -217,6 +217,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) -- GitLab From 4e5bd2287021823c10862379312648dd55e17f49 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:53:55 +0100 Subject: [PATCH 0274/1418] wifi: mac80211: set station RX-NSS on reconfig [ Upstream commit dd6c064cfc3fc18d871107c6f5db8837e88572e4 ] When a station is added/reconfigured by userspace, e.g. a TDLS peer or a SoftAP client STA, rx_nss is currently not always set, so that it might be left zero. Set it up properly. Link: https://msgid.link/20240129155354.98f148a3d654.I193a02155f557ea54dc9d0232da66cf96734119a@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a2c4866080bd7..6cf0b77839d1d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1775,6 +1775,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } -- GitLab From 2bf17c3e13aab5e1e867508b2db85927810f9f97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:23 +0100 Subject: [PATCH 0275/1418] wifi: mac80211: adding missing drv_mgd_complete_tx() call [ Upstream commit c042600c17d8c490279f0ae2baee29475fe8047d ] There's a call to drv_mgd_prepare_tx() and so there should be one to drv_mgd_complete_tx(), but on this path it's not. Add it. Link: https://msgid.link/20240131164824.2f0922a514e1.I5aac89b93bcead88c374187d70cad0599d29d2c8@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c6f0da028a2a4..f25dc6931a5b1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7294,6 +7294,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } -- GitLab From 700c3f642c32721f246e09d3a9511acf40ae42be Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:03 -0800 Subject: [PATCH 0276/1418] efi: runtime: Fix potential overflow of soft-reserved region size [ Upstream commit de1034b38a346ef6be25fe8792f5d1e0684d5ff4 ] md_size will have been narrowed if we have >= 4GB worth of pages in a soft-reserved region. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/riscv-runtime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c48c380d722c..1995f0a2e0fc0 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index d0daacd2c903f..6b142aa35389e 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) -- GitLab From 48a33c312526857dc8850096cc31c53b2d6b7e27 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:04 -0800 Subject: [PATCH 0277/1418] efi: Don't add memblocks for soft-reserved memory [ Upstream commit 0bcff59ef7a652fcdc6d535554b63278c2406c8f ] Adding memblocks for soft-reserved regions prevents them from later being hotplugged in by dax_kmem. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi-init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2fd770b499a35..ff9791ce2e156 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -116,15 +116,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -169,6 +160,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) -- GitLab From 76ee44af09755afaa24964bb639730f66685657e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:36 +0800 Subject: [PATCH 0278/1418] hwmon: (coretemp) Enlarge per package core count limit [ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ] Currently, coretemp driver supports only 128 cores per package. This loses some core temperature information on systems that have more than 128 cores per package. [ 58.685033] coretemp coretemp.0: Adding Core 128 failed [ 58.692009] coretemp coretemp.0: Adding Core 129 failed ... Enlarge the limitation to 512 because there are platforms with more than 256 cores per package. Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 59344ad62822d..c0aa6bfa66b24 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) -- GitLab From 75745f2b7453f21df39d913ec1e6cfee7887c88d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 20 Dec 2023 17:26:58 +0100 Subject: [PATCH 0279/1418] scsi: lpfc: Use unsigned type for num_sge [ Upstream commit d6c1b19153f92e95e5e1801d540e98771053afae ] LUNs going into "failed ready running" state observed on >1T and on even numbers of size (2T, 4T, 6T, 8T and 10T). The issue occurs when DIF is enabled at the host. The kernel logs: Cannot setup S/G List for HBAIO segs 1/1 SGL 512 SCSI 256: 3 0 The host lpfc driver is failing to setup scatter/gather list (protection data) for the I/Os. The return type lpfc_bg_setup_sgl()/lpfc_bg_setup_sgl_prot() causes the compiler to remove the most significant bit. Use an unsigned type instead. Signed-off-by: Hannes Reinecke [dwagner: added commit message] Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20231220162658.12392-1-dwagner@suse.de Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 7aac9fc719675..0bb7e164b525f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1919,7 +1919,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1927,8 +1927,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2100,7 +2100,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2124,8 +2124,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); -- GitLab From c7ac9c1f7f934df56fe1c1bb5ac970d543d4dc09 Mon Sep 17 00:00:00 2001 From: SEO HOYOUNG Date: Mon, 22 Jan 2024 17:33:24 +0900 Subject: [PATCH 0280/1418] scsi: ufs: core: Remove the ufshcd_release() in ufshcd_err_handling_prepare() [ Upstream commit 17e94b2585417e04dabc2f13bc03b4665ae687f3 ] If ufshcd_err_handler() is called in a suspend/resume situation, ufs_release() can be called twice and active_reqs end up going negative. This is because ufshcd_err_handling_prepare() and ufshcd_err_handling_unprepare() both call ufshcd_release(). Remove superfluous call to ufshcd_release(). Signed-off-by: SEO HOYOUNG Link: https://lore.kernel.org/r/20240122083324.11797-1-hy50.seo@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9fd4e9ed93b8b..f3c25467e571f 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6159,7 +6159,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba, false); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { -- GitLab From c920f604e0c896aab911efb5913ce89f74b9b69c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0281/1418] LoongArch: Select ARCH_ENABLE_THP_MIGRATION instead of redefining it [ Upstream commit b3ff2d9c3a9c64cd0a011cdd407ffc38a6ea8788 ] ARCH_ENABLE_THP_MIGRATION is supposed to be selected by arch Kconfig. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e737dc8cd660c..b1b4396dbac6c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -9,6 +9,7 @@ config LOONGARCH select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -495,10 +496,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -- GitLab From c0b07b4237e42cf952dc72d0f459e86fc3e2e121 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0282/1418] LoongArch: Select HAVE_ARCH_SECCOMP to use the common SECCOMP menu [ Upstream commit 6b79ecd084c99b31c8b4d0beda08893716d5558e ] LoongArch missed the refactoring made by commit 282a181b1a0d ("seccomp: Move config option SECCOMP to arch/Kconfig") because LoongArch was not mainlined at that time. The 'depends on PROC_FS' statement is stale as described in that commit. Select HAVE_ARCH_SECCOMP, and remove the duplicated config entry. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b1b4396dbac6c..fa3171f563274 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -81,6 +81,7 @@ config LOONGARCH select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -462,23 +463,6 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL -- GitLab From 6b82ffe7a265b832b40fd31ee5503170cb3407e2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 7 Feb 2024 08:01:17 +0900 Subject: [PATCH 0283/1418] firewire: core: send bus reset promptly on gap count error [ Upstream commit 7ed4380009e96d9e9c605e12822e987b35b05648 ] If we are bus manager and the bus has inconsistent gap counts, send a bus reset immediately instead of trying to read the root node's config ROM first. Otherwise, we could spend a lot of time trying to read the config ROM but never succeeding. This eliminates a 50+ second delay before the FireWire bus is usable after a newly connected device is powered on in certain circumstances. The delay occurs if a gap count inconsistency occurs, we are not the root node, and we become bus manager. One scenario that causes this is with a TI XIO2213B OHCI, the first time a Sony DSR-25 is powered on after being connected to the FireWire cable. In this configuration, the Linux box will not receive the initial PHY configuration packet sent by the DSR-25 as IRM, resulting in the DSR-25 having a gap count of 44 while the Linux box has a gap count of 63. FireWire devices have a gap count parameter, which is set to 63 on power-up and can be changed with a PHY configuration packet. This determines the duration of the subaction and arbitration gaps. For reliable communication, all nodes on a FireWire bus must have the same gap count. A node may have zero or more of the following roles: root node, bus manager (BM), isochronous resource manager (IRM), and cycle master. Unless a root node was forced with a PHY configuration packet, any node might become root node after a bus reset. Only the root node can become cycle master. If the root node is not cycle master capable, the BM or IRM should force a change of root node. After a bus reset, each node sends a self-ID packet, which contains its current gap count. A single bus reset does not change the gap count, but two bus resets in a row will set the gap count to 63. Because a consistent gap count is required for reliable communication, IEEE 1394a-2000 requires that the bus manager generate a bus reset if it detects that the gap count is inconsistent. When the gap count is inconsistent, build_tree() will notice this after the self identification process. It will set card->gap_count to the invalid value 0. If we become bus master, this will force bm_work() to send a bus reset when it performs gap count optimization. After a bus reset, there is no bus manager. We will almost always try to become bus manager. Once we become bus manager, we will first determine whether the root node is cycle master capable. Then, we will determine if the gap count should be changed. If either the root node or the gap count should be changed, we will generate a bus reset. To determine if the root node is cycle master capable, we read its configuration ROM. bm_work() will wait until we have finished trying to read the configuration ROM. However, an inconsistent gap count can make this take a long time. read_config_rom() will read the first few quadlets from the config ROM. Due to the gap count inconsistency, eventually one of the reads will time out. When read_config_rom() fails, fw_device_init() calls it again until MAX_RETRIES is reached. This takes 50+ seconds. Once we give up trying to read the configuration ROM, bm_work() will wake up, assume that the root node is not cycle master capable, and do a bus reset. Hopefully, this will resolve the gap count inconsistency. This change makes bm_work() check for an inconsistent gap count before waiting for the root node's configuration ROM. If the gap count is inconsistent, bm_work() will immediately do a bus reset. This eliminates the 50+ second delay and rapidly brings the bus to a working state. I considered that if the gap count is inconsistent, a PHY configuration packet might not be successful, so it could be desirable to skip the PHY configuration packet before the bus reset in this case. However, IEEE 1394a-2000 and IEEE 1394-2008 say that the bus manager may transmit a PHY configuration packet before a bus reset when correcting a gap count error. Since the standard endorses this, I decided it's safe to retain the PHY configuration packet transmission. Normally, after a topology change, we will reset the bus a maximum of 5 times to change the root node and perform gap count optimization. However, if there is a gap count inconsistency, we must always generate a bus reset. Otherwise the gap count inconsistency will persist and communication will be unreliable. For that reason, if there is a gap count inconstency, we generate a bus reset even if we already reached the 5 reset limit. Signed-off-by: Adam Goldman Reference: https://sourceforge.net/p/linux1394/mailman/message/58727806/ Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-card.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a2fe2..8aaa7fcb2630d 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. -- GitLab From d56edd0f1b3013e0a452c53cb476b6b7b39210a1 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 16 Jan 2024 19:10:45 +0800 Subject: [PATCH 0284/1418] drm/amdgpu: skip to program GFXDEC registers for suspend abort [ Upstream commit 93bafa32a6918154aa0caf9f66679a32c2431357 ] In the suspend abort cases, the gfx power rail doesn't turn off so some GFXDEC registers/CSB can't reset to default value and at this moment reinitialize GFXDEC/CSB will result in an unexpected error. So let skip those program sequence for the suspend abort case. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c46c6fbd235e8..e636c7850f777 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -999,6 +999,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b9983ca99eb7d..f24c3a20e901d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2414,6 +2414,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2428,6 +2429,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84ca601f7d5f3..195b298923543 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3064,6 +3064,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); -- GitLab From 8b661fb17bfad9e2b72ed713c86a52829f9b0473 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jan 2024 13:39:37 +0800 Subject: [PATCH 0285/1418] drm/amdgpu: reset gpu for s3 suspend abort case [ Upstream commit 6ef82ac664bb9568ca3956e0d9c9c478e25077ff ] In the s3 suspend abort case some type of gfx9 power rail not turn off from FCH side and this will put the GPU in an unknown power status, so let's reset the gpu to a known good power state before reinitialize gpu device. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 811dd3ea63620..489c89465c78b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1285,10 +1285,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } -- GitLab From 0947d0d463d4e6fad75f3a3066613cb3d9689b26 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 2 Feb 2024 12:38:24 -0300 Subject: [PATCH 0286/1418] smb: client: set correct d_type for reparse points under DFS mounts [ Upstream commit 55c7788c37242702868bfac7861cdf0c358d6c3d ] Send query dir requests with an info level of SMB_FIND_FILE_FULL_DIRECTORY_INFO rather than SMB_FIND_FILE_DIRECTORY_INFO when the client is generating its own inode numbers (e.g. noserverino) so that reparse tags still can be parsed directly from the responses, but server won't send UniqueId (server inode number) Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/readdir.c | 15 ++++++++------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 2d75ba5aaa8ad..5990bdbae598f 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -304,14 +304,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -425,7 +427,7 @@ ffirst_retry: } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -1019,10 +1021,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c1fc1651d8b69..4c1231496a725 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5010,6 +5010,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5079,6 +5082,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); -- GitLab From db48acce75d73dfe51c43d56893cce067b73cf08 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 29 Jan 2024 16:52:50 +0800 Subject: [PATCH 0287/1418] virtio-blk: Ensure no requests in virtqueues before deleting vqs. [ Upstream commit 4ce6e2db00de8103a0687fb0f65fd17124a51aaa ] Ensure no remaining requests in virtqueues before resetting vdev and deleting virtqueues. Otherwise these requests will never be completed. It may cause the system to become unresponsive. Function blk_mq_quiesce_queue() can ensure that requests have become in_flight status, but it cannot guarantee that requests have been processed by the device. Virtqueues should never be deleted before all requests become complete status. Function blk_mq_freeze_queue() ensure that all requests in virtqueues become complete status. And no requests can enter in virtqueues. Signed-off-by: Yi Sun Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240129085250.1550594-1-yi.sun@unisoc.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3124837aa406f..505026f0025c7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1206,14 +1206,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1231,7 +1232,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif -- GitLab From 4dd73641d7ac7273751a893c7445f9d2dbcec2cb Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 Feb 2024 23:57:18 -0600 Subject: [PATCH 0288/1418] smb3: clarify mount warning [ Upstream commit a5cc98eba2592d6e3c5a4351319595ddde2a5901 ] When a user tries to use the "sec=krb5p" mount parameter to encrypt data on connection to a server (when authenticating with Kerberos), we indicate that it is not supported, but do not note the equivalent recommended mount parameter ("sec=krb5,seal") which turns on encryption for that mount (and uses Kerberos for auth). Update the warning message. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index f4818599c00a2..4d5302b58b534 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -209,7 +209,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; -- GitLab From 339ddc983bc1622341d95f244c361cda3da3a4ff Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 25 Dec 2023 15:36:15 +0200 Subject: [PATCH 0289/1418] pmdomain: mediatek: fix race conditions with genpd [ Upstream commit c41336f4d69057cbf88fed47951379b384540df5 ] If the power domains are registered first with genpd and *after that* the driver attempts to power them on in the probe sequence, then it is possible that a race condition occurs if genpd tries to power them on in the same time. The same is valid for powering them off before unregistering them from genpd. Attempt to fix race conditions by first removing the domains from genpd and *after that* powering down domains. Also first power up the domains and *after that* register them to genpd. Fixes: 59b644b01cf4 ("soc: mediatek: Add MediaTek SCPSYS power domains") Signed-off-by: Eugen Hristev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231225133615.78993-1-eugen.hristev@collabora.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/soc/mediatek/mtk-pm-domains.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index 474b272f9b02d..832adb570b501 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -499,6 +499,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren goto err_put_node; } + /* recursive call to add all subdomains */ + ret = scpsys_add_subdomain(scpsys, child); + if (ret) + goto err_put_node; + ret = pm_genpd_add_subdomain(parent_pd, child_pd); if (ret) { dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n", @@ -508,11 +513,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name, child_pd->name); } - - /* recursive call to add all subdomains */ - ret = scpsys_add_subdomain(scpsys, child); - if (ret) - goto err_put_node; } return 0; @@ -526,9 +526,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) { int ret; - if (scpsys_domain_is_on(pd)) - scpsys_power_off(&pd->genpd); - /* * We're in the error cleanup already, so we only complain, * but won't emit another error on top of the original one. @@ -538,6 +535,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) dev_err(pd->scpsys->dev, "failed to remove domain '%s' : %d - state may be inconsistent\n", pd->genpd.name, ret); + if (scpsys_domain_is_on(pd)) + scpsys_power_off(&pd->genpd); clk_bulk_put(pd->num_clks, pd->clks); clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks); -- GitLab From 4aa4ea70f37c18d5c2db347858327efea20ade18 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Jan 2024 17:33:55 +0100 Subject: [PATCH 0290/1418] pmdomain: renesas: r8a77980-sysc: CR7 must be always on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f0e4a1356466ec1858ae8e5c70bea2ce5e55008b ] The power domain containing the Cortex-R7 CPU core on the R-Car V3H SoC must always be in power-on state, unlike on other SoCs in the R-Car Gen3 family. See Table 9.4 "Power domains" in the R-Car Series, 3rd Generation Hardware User’s Manual Rev.1.00 and later. Fix this by marking the domain as a CPU domain without control registers, so the driver will not touch it. Fixes: 41d6d8bd8ae9 ("soc: renesas: rcar-sysc: add R8A77980 support") Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/fdad9a86132d53ecddf72b734dac406915c4edc0.1705076735.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/soc/renesas/r8a77980-sysc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/renesas/r8a77980-sysc.c b/drivers/soc/renesas/r8a77980-sysc.c index 39ca84a67daad..621e411fc9991 100644 --- a/drivers/soc/renesas/r8a77980-sysc.c +++ b/drivers/soc/renesas/r8a77980-sysc.c @@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = { PD_CPU_NOCR }, { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, PD_CPU_NOCR }, - { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, + { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, + PD_CPU_NOCR }, { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, -- GitLab From 52dc9a7a573dbf778625a0efca0fca55489f084b Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 1 Feb 2024 09:10:08 +0100 Subject: [PATCH 0291/1418] IB/hfi1: Fix sdma.h tx->num_descs off-by-one error commit e6f57c6881916df39db7d95981a8ad2b9c3458d6 upstream. Unfortunately the commit `fd8958efe877` introduced another error causing the `descs` array to overflow. This reults in further crashes easily reproducible by `sendmsg` system call. [ 1080.836473] general protection fault, probably for non-canonical address 0x400300015528b00a: 0000 [#1] PREEMPT SMP PTI [ 1080.869326] RIP: 0010:hfi1_ipoib_build_ib_tx_headers.constprop.0+0xe1/0x2b0 [hfi1] -- [ 1080.974535] Call Trace: [ 1080.976990] [ 1081.021929] hfi1_ipoib_send_dma_common+0x7a/0x2e0 [hfi1] [ 1081.027364] hfi1_ipoib_send_dma_list+0x62/0x270 [hfi1] [ 1081.032633] hfi1_ipoib_send+0x112/0x300 [hfi1] [ 1081.042001] ipoib_start_xmit+0x2a9/0x2d0 [ib_ipoib] [ 1081.046978] dev_hard_start_xmit+0xc4/0x210 -- [ 1081.148347] __sys_sendmsg+0x59/0xa0 crash> ipoib_txreq 0xffff9cfeba229f00 struct ipoib_txreq { txreq = { list = { next = 0xffff9cfeba229f00, prev = 0xffff9cfeba229f00 }, descp = 0xffff9cfeba229f40, coalesce_buf = 0x0, wait = 0xffff9cfea4e69a48, complete = 0xffffffffc0fe0760 , packet_len = 0x46d, tlen = 0x0, num_desc = 0x0, desc_limit = 0x6, next_descq_idx = 0x45c, coalesce_idx = 0x0, flags = 0x0, descs = {{ qw = {0x8024000120dffb00, 0x4} # SDMA_DESC0_FIRST_DESC_FLAG (bit 63) }, { qw = { 0x3800014231b108, 0x4} }, { qw = { 0x310000e4ee0fcf0, 0x8} }, { qw = { 0x3000012e9f8000, 0x8} }, { qw = { 0x59000dfb9d0000, 0x8} }, { qw = { 0x78000e02e40000, 0x8} }} }, sdma_hdr = 0x400300015528b000, <<< invalid pointer in the tx request structure sdma_status = 0x0, SDMA_DESC0_LAST_DESC_FLAG (bit 62) complete = 0x0, priv = 0x0, txq = 0xffff9cfea4e69880, skb = 0xffff9d099809f400 } If an SDMA send consists of exactly 6 descriptors and requires dword padding (in the 7th descriptor), the sdma_txreq descriptor array is not properly expanded and the packet will overflow into the container structure. This results in a panic when the send completion runs. The exact panic varies depending on what elements of the container structure get corrupted. The fix is to use the correct expression in _pad_sdma_tx_descs() to test the need to expand the descriptor array. With this patch the crashes are no longer reproducible and the machine is stable. Fixes: fd8958efe877 ("IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors") Cc: stable@vger.kernel.org Reported-by: Mats Kronberg Tested-by: Mats Kronberg Signed-off-by: Daniel Vacek Link: https://lore.kernel.org/r/20240201081009.1109442-1-neelx@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 26c62162759ba..969c5c3ab859e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); -- GitLab From a262b78dd085dbe9b3c75dc1d9c4cd102b110b53 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 0292/1418] LoongArch: Disable IRQ before init_fn() for nonboot CPUs commit 1001db6c42e4012b55e5ee19405490f23e033b5a upstream. Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to silence such warnings (and also avoid potential errors due to unexpected interrupts): WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 ra: 90000000047bd56c tlb_init+0x24c/0x528 ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000000 (PPLV0 -PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071000 (LIE=12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 9000000006456888 9000000006456880 900000010039f950 0000000000000001 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 9000000006366000 900000000619a140 0000000000000000 0000000000000004 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 ... Call Trace: [<90000000047a4828>] show_stack+0x48/0x1a0 [<9000000005b61874>] dump_stack_lvl+0x84/0xcc [<90000000047f60ac>] __warn+0x8c/0x1e0 [<9000000005b0ab34>] report_bug+0x1b4/0x280 [<9000000005b63110>] do_bp+0x2d0/0x480 [<90000000047a2e20>] handle_bp+0x120/0x1c0 [<90000000048e3334>] rcu_cpu_starting+0x214/0x280 [<90000000047bd568>] tlb_init+0x248/0x528 [<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 [<90000000047a19f4>] cpu_probe+0x494/0xa00 [<90000000047b551c>] start_secondary+0x3c/0xc0 [<9000000005b66134>] smpboot_entry+0x50/0x58 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index e0404df2c952f..18a2b37f4aea3 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -297,6 +297,7 @@ void play_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); -- GitLab From 47647795a630e74a2a727dfc0fe362b30cbf8905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Feb 2024 08:33:24 +0100 Subject: [PATCH 0293/1418] drm/ttm: Fix an invalid freeing on already freed page in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40510a941d27d405a82dc3320823d875f94625df upstream. If caching mode change fails due to, for example, OOM we free the allocated pages in a two-step process. First the pages for which the caching change has already succeeded. Secondly the pages for which a caching change did not succeed. However the second step was incorrectly freeing the pages already freed in the first step. Fix. Signed-off-by: Thomas Hellström Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Cc: # v6.4+ Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 86affe987a1cb..393b97b4a991f 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; -- GitLab From 65c5a1ba2c32dc90bf112c11151bf912d317906d Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 14 Feb 2024 16:06:28 +0100 Subject: [PATCH 0294/1418] s390/cio: fix invalid -EBUSY on ccw_device_start commit 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 upstream. The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code when drivers try to start I/O while a path-verification (PV) process is pending. This can lead to failed device initialization attempts with symptoms like broken network connectivity after boot. Fix this by replacing the -EBUSY return code with a deferred condition code 1 reply to make path-verification handling consistent from a driver's point of view. The problem can be reproduced semi-regularly using the following process, while repeating steps 2-3 as necessary (example assumes an OSA device with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02): 1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group 2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online 3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \ echo on > /sys/devices/css0/chp0.02/status Background information: The common I/O layer starts path-verification I/Os when it receives indications about changes in a device path's availability. This occurs for example when hardware events indicate a change in channel-path status, or when a manual operation such as a CHPID vary or configure operation is performed. If a driver attempts to start I/O while a PV is running, CIO reports a successful I/O start (ccw_device_start() return code 0). Then, after completion of PV, CIO synthesizes an interrupt response that indicates an asynchronous status condition that prevented the start of the I/O (deferred condition code 1). If a PV indication arrives while a device is busy with driver-owned I/O, PV is delayed until after I/O completion was reported to the driver's interrupt handler. To ensure that PV can be started eventually, CIO reports a device busy condition (ccw_device_start() return code -EBUSY) if a driver tries to start another I/O while PV is pending. In some cases this -EBUSY return code causes device drivers to consider a device not operational, resulting in failed device initialization. Note: The code that introduced the problem was added in 2003. Symptoms started appearing with the following CIO commit that causes a PV indication when a device is removed from the cio_ignore list after the associated parent subchannel device was probed, but before online processing of the CCW device has started: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") During boot, the cio_ignore list is modified by the cio_ignore dracut module [1] as well as Linux vendor-specific systemd service scripts[2]. When combined, this commit and boot scripts cause a frequent occurrence of the problem during boot. [1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore [2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service Cc: stable@vger.kernel.org # v5.15+ Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Tested-By: Thorsten Winkler Reviewed-by: Thorsten Winkler Signed-off-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/device_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6bb..a5dba3829769c 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) -- GitLab From 287abdcb9e2b0836716655908d8f4cccd0925da8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Jan 2024 20:51:22 +0900 Subject: [PATCH 0295/1418] ata: libata-core: Do not try to set sleeping devices to standby commit 4b085736e44dbbe69b5eea1a8a294f404678a1f4 upstream. In ata ata_dev_power_set_standby(), check that the target device is not sleeping. If it is, there is no need to do anything. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa2fc1953fc26..f14e56a5cff6b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2005,6 +2005,10 @@ void ata_dev_power_set_active(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only * if supported by the device. -- GitLab From 5583552eec30eb58acac404948e50609b070cb98 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:31:11 +0100 Subject: [PATCH 0296/1418] dm-crypt: recheck the integrity tag after a failure commit 42e15d12070b4ff9af2b980f1b65774c2dab0507 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-crypt reports an authentication error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-crypt, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0e6068ee783e7..cac9b609c63a7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -61,6 +61,8 @@ struct convert_context { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -81,6 +83,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1365,10 +1369,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1724,6 +1731,8 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; @@ -1735,6 +1744,8 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_queue_read(struct dm_crypt_io *io); + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1748,6 +1759,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1783,15 +1803,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1812,6 +1836,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2074,6 +2114,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2103,11 +2151,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2150,10 +2204,13 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -3079,7 +3136,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } -- GitLab From 6437b0b4ddcd258b400baec9d4601ba9e19e3faf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 5 Feb 2024 10:39:20 +0100 Subject: [PATCH 0297/1418] Revert "parisc: Only list existing CPUs in cpu_possible_mask" commit 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 upstream. This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226. It broke CPU hotplugging because it modifies the __cpu_possible_mask after bootup, so that it will be different than nr_cpu_ids, which then effictively breaks the workqueue setup code and triggers crashes when shutting down CPUs at runtime. Guenter was the first who noticed the wrong values in __cpu_possible_mask, since the cpumask Kunit tests were failig. Reverting this commit fixes both issues, but sadly brings back this uncritical runtime warning: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Reported-by: Guenter Roeck Link: https://lkml.org/lkml/2024/2/4/146 Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/ Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/processor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 1f6c776d80813..c67883487ecd3 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,7 +171,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -466,13 +465,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } -- GitLab From 906414f4596469004632de29126c55751ed82c5e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:27:39 +0100 Subject: [PATCH 0298/1418] dm-integrity: recheck the integrity tag after a failure commit c88f5e553fe38b2ffc4c33d08654e5281b297677 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-integrity reports an error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-integrity, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77fcff82c82ac..10b01b2adc679 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -279,6 +279,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1699,6 +1701,79 @@ failed: get_random_bytes(result, ic->tag_size); } +static void integrity_recheck(struct dm_integrity_io *dio) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, + checksum_onstack); + r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1784,15 +1859,8 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4208,6 +4276,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4572,6 +4646,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); -- GitLab From e08c2a8d27e989f0f5b0888792643027d7e691e6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:30:10 +0100 Subject: [PATCH 0299/1418] dm-crypt: don't modify the data when using authenticated encryption commit 50c70240097ce41fe6bce6478b80478281e4d0f7 upstream. It was said that authenticated encryption could produce invalid tag when the data that is being encrypted is modified [1]. So, fix this problem by copying the data into the clone bio first and then encrypt them inside the clone bio. This may reduce performance, but it is needed to prevent the user from corrupting the device by writing data with O_DIRECT and modifying them at the same time. [1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cac9b609c63a7..3e215aa85b99a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2078,6 +2078,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); -- GitLab From 27c1ade6068fe4de9eee0fa1dc9393fd4b79246c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:28:09 +0100 Subject: [PATCH 0300/1418] dm-verity: recheck the hash after a failure commit 9177f3c0dea6143d05cac1bbd28668fd0e216d11 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-verity reports an error [1]. This commit fixes dm-verity, so that if hash verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the hash is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 86 ++++++++++++++++++++++++++++++++--- drivers/md/dm-verity.h | 6 +++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 4669923f4cfb4..3b0d0bcd6f0d6 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -474,6 +474,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -500,9 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -553,10 +608,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -578,6 +630,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -928,6 +984,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1364,6 +1424,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f3f6070084196..4620a98c99561 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include #include #include #include @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -84,6 +88,8 @@ struct dm_verity_io { struct work_struct work; + char *recheck_buffer; + /* * Three variably-size fields follow this struct: * -- GitLab From 031217128990d7f0ab8c46db1afb3cf1e075fd29 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 16 Feb 2024 17:01:13 +0100 Subject: [PATCH 0301/1418] cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window commit 0cab687205986491302cd2e440ef1d253031c221 upstream. The Linux CXL subsystem is built on the assumption that HPA == SPA. That is, the host physical address (HPA) the HDM decoder registers are programmed with are system physical addresses (SPA). During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1, 8.1.3.8) are checked if the memory is enabled and the CXL range is in a HPA window that is described in a CFMWS structure of the CXL host bridge (cxl-3.1, 9.18.1.3). Now, if the HPA is not an SPA, the CXL range does not match a CFMWS window and the CXL memory range will be disabled then. The HDM decoder stops working which causes system memory being disabled and further a system hang during HDM decoder initialization, typically when a CXL enabled kernel boots. Prevent a system hang and do not disable the HDM decoder if the decoder's CXL range is not found in a CFMWS window. Note the change only fixes a hardware hang, but does not implement HPA/SPA translation. Support for this can be added in a follow on patch series. Signed-off-by: Robert Richter Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges") Cc: Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 003a44132418a..5584af15300a8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -376,9 +376,9 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* -- GitLab From f49b20fd0134da84a6bd8108f9e73c077b7d6231 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Feb 2024 23:43:56 +0900 Subject: [PATCH 0302/1418] scsi: target: pscsi: Fix bio_put() for error case commit de959094eb2197636f7c803af0943cb9d3b35804 upstream. As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit() and kfree(). That is not done properly for the error case, hitting WARN and NULL pointer dereference in bio_free(). Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pscsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 69a4c9581e80e..7aec34c090661 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -910,12 +910,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -- GitLab From e3bf0a24e050538d3e4c8d8319b8e04e18188ce7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Feb 2024 17:14:11 -0500 Subject: [PATCH 0303/1418] scsi: core: Consult supported VPD page list prior to fetching page commit b5fc07a5fb56216a49e6c1d0b172d5464d99a89b upstream. Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") removed the logic which checks whether a VPD page is present on the supported pages list before asking for the page itself. That was done because SPC helpfully states "The Supported VPD Pages VPD page list may or may not include all the VPD pages that are able to be returned by the device server". Testing had revealed a few devices that supported some of the 0xBn pages but didn't actually list them in page 0. Julian Sikorski bisected a problem with his drive resetting during discovery to the commit above. As it turns out, this particular drive firmware will crash if we attempt to fetch page 0xB9. Various approaches were attempted to work around this. In the end, reinstating the logic that consults VPD page 0 before fetching any other page was the path of least resistance. A firmware update for the devices which originally compelled us to remove the check has since been released. Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Cc: stable@vger.kernel.org Cc: Bart Van Assche Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 22 ++++++++++++++++++++-- include/scsi/scsi_device.h | 4 ---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3cda5d26b66ca..e70ab8db30142 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; + /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index fdc31fdb612da..d2751ed536df2 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; -- GitLab From 2dedda77d4493f3e92e414b272bfa60f1f51ed95 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: [PATCH 0304/1418] mm/swap: fix race when skipping swapcache commit 13ddaf26be324a7f951891ecd9ccd04466d27458 upstream. When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swap.h | 5 +++++ mm/swapfile.c | 13 +++++++++++++ 4 files changed, 43 insertions(+) diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c724c..add47f43e568e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -571,6 +571,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/memory.c b/mm/memory.c index fc8b264ec0cac..fb83cf56377ab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3761,6 +3761,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3822,6 +3823,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4073,6 +4088,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4086,6 +4104,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swap.h b/mm/swap.h index cc08c459c6190..5eff40ef76934 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -39,6 +39,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); @@ -98,6 +99,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 71db6d8a1ea30..cca9fda9d036f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3373,6 +3373,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); -- GitLab From 4c815c3a48349842cc43a4101f7387af8904a0fb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:25 -0800 Subject: [PATCH 0305/1418] mm/damon/lru_sort: fix quota status loss due to online tunings commit 13d0599ab3b2ff17f798353f24bcbef1659d3cfc upstream. For online parameters change, DAMON_LRU_SORT creates new schemes based on latest values of the parameters and replaces the old schemes with the new one. When creating it, the internal status of the quotas of the old schemes is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: SeongJae Park Cc: [6.0+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 63bdad20dbaf8..98a678129b067 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, -- GitLab From 19e5dc2e6bf7cb2160bbb996f18f3bfb4621e770 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 13 Feb 2024 03:16:34 -0500 Subject: [PATCH 0306/1418] mm: memcontrol: clarify swapaccount=0 deprecation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 118642d7f606fc9b9c92ee611275420320290ffb upstream. The swapaccount deprecation warning is throwing false positives. Since we deprecated the knob and defaulted to enabling, the only reports we've been getting are from folks that set swapaccount=1. While this is a nice affirmation that always-enabling was the right choice, we certainly don't want to warn when users request the supported mode. Only warn when disabling is requested, and clarify the warning. [colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"] Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode") Signed-off-by: Colin Ian King Reported-by: "Jonas Schäfer" Reported-by: Narcis Garcia Suggested-by: Yosry Ahmed Signed-off-by: Johannes Weiner Reviewed-by: Yosry Ahmed Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da98e3e71cfe..4570d3e315cf1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7517,9 +7517,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); -- GitLab From fff39f496265edd728457ac39030c52a2113aa13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:33:00 +0100 Subject: [PATCH 0307/1418] platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler commit 84c16d01ff219bc0a5dca5219db6b8b86a6854fb upstream. Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2: 1. The ThinkPad will wake up immediately from suspend 2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second Both these issues are caused by the "VBDL" ACPI method call added at the end of the notify_handler. And it never became entirely clear if this call is even necessary to fix the issue of missing tablet-mode-switch events on the Dell Inspiron 7352. Drop the "VBDL" ACPI method call again to fix the 2 issues this is causing on the ThinkPad X1 Tablet Gen2. Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") Reported-by: Alexander Kobel Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/ Cc: regressions@lists.linux.dev Cc: Arnold Gozum Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Tested-by: Alexander Kobel Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/vbtn.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 8e2b07ed2ce94..c10c99a31a90a 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* -- GitLab From 9e7fc40377ec28d15a46cf59c413616fe9a78d57 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2024 13:06:07 +0100 Subject: [PATCH 0308/1418] platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names commit dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 upstream. On some devices the ACPI name of the touchscreen is e.g. either MSSL1680:00 or MSSL1680:01 depending on the BIOS version. This happens for example on the "Chuwi Hi8 Air" tablet where the initial commit's ts_data uses "MSSL1680:00" but the tablets from the github issue and linux-hardware.org probe linked below both use "MSSL1680:01". Replace the strcmp() match on ts_data->acpi_name with a strstarts() check to allow using a partial match on just the ACPI HID of "MSSL1680" and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly to fix the touchscreen not working on models where it is "MSSL1680:01". Note this drops the length check for I2C_NAME_SIZE. This never was necessary since the ACPI names used are never more then 11 chars and I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long before reaching I2C_NAME_SIZE. Link: https://linux-hardware.org/?computer=AC4301C0542A Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet") Closes: https://github.com/onitake/gsl-firmware/issues/91 Cc: stable@vger.kernel.org Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/touchscreen_dmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 50ec19188a20d..11d72a3533552 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -50,7 +50,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -1776,7 +1776,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); -- GitLab From 8b218e2f0a27a9f09428b1847b4580640b9d1e58 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Sat, 17 Feb 2024 16:14:31 +0800 Subject: [PATCH 0309/1418] cachefiles: fix memory leak in cachefiles_add_cache() commit e21a2f17566cbd64926fb8f16323972f7a064444 upstream. The following memory leak was reported after unbinding /dev/cachefiles: ================================================================== unreferenced object 0xffff9b674176e3c0 (size 192): comm "cachefilesd2", pid 680, jiffies 4294881224 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc ea38a44b): [] kmem_cache_alloc+0x2d5/0x370 [] prepare_creds+0x26/0x2e0 [] cachefiles_determine_cache_security+0x1f/0x120 [] cachefiles_add_cache+0x13c/0x3a0 [] cachefiles_daemon_write+0x146/0x1c0 [] vfs_write+0xcb/0x520 [] ksys_write+0x69/0xf0 [] do_syscall_64+0x72/0x140 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ================================================================== Put the reference count of cache_cred in cachefiles_daemon_unbind() to fix the problem. And also put cache_cred in cachefiles_add_cache() error branch to avoid memory leaks. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") CC: stable@vger.kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com Acked-by: David Howells Reviewed-by: Jingbo Xu Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/cache.c | 2 ++ fs/cachefiles/daemon.c | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f474..f449f7340aad0 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index aa4efcabb5e37..5f4df9588620f 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); -- GitLab From 6b2ff10390b19a2364af622b6666b690443f9f3f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:51 +0800 Subject: [PATCH 0310/1418] md: Fix missing release of 'active_io' for flush commit 855678ed8534518e2b428bcbcec695de9ba248e8 upstream. submit_flushes atomic_set(&mddev->flush_pending, 1); rdev_for_each_rcu(rdev, mddev) atomic_inc(&mddev->flush_pending); bi->bi_end_io = md_end_flush submit_bio(bi); /* flush io is done first */ md_end_flush if (atomic_dec_and_test(&mddev->flush_pending)) percpu_ref_put(&mddev->active_io) -> active_io is not released if (atomic_dec_and_test(&mddev->flush_pending)) -> missing release of active_io For consequence, mddev_suspend() will wait for 'active_io' to be zero forever. Fix this problem by releasing 'active_io' in submit_flushes() if 'flush_pending' is decreased to zero. Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Blazej Kucman Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index c7efe15229514..846bdee4daa0e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -564,8 +564,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) -- GitLab From 72fdbc728c339413f2fee7c042ebc124d6b223d2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:32 +0000 Subject: [PATCH 0311/1418] KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler commit 85a71ee9a0700f6c18862ef3b0011ed9dad99aca upstream. It is possible that an LPI mapped in a different ITS gets unmapped while handling the MOVALL command. If that is the case, there is no state that can be migrated to the destination. Silently ignore it and continue migrating other LPIs. Cc: stable@vger.kernel.org Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 3c344e4cd4cad..f033b9b0363ff 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1427,6 +1427,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); -- GitLab From 3ac3624a74cf251cf7c32b6b29231d4696e9291d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:31 +0000 Subject: [PATCH 0312/1418] KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() commit 8d3a7dfb801d157ac423261d7cd62c33e95375f8 upstream. vgic_get_irq() may not return a valid descriptor if there is no ITS that holds a valid translation for the specified INTID. If that is the case, it is safe to silently ignore it and continue processing the LPI pending table. Cc: stable@vger.kernel.org Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index f033b9b0363ff..092327665a6ef 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); -- GitLab From 3963f16cc7643b461271989b712329520374ad2a Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Wed, 14 Feb 2024 19:27:33 +0300 Subject: [PATCH 0313/1418] gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() commit 136cfaca22567a03bbb3bf53a43d8cb5748b80ec upstream. The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 RSP: 0018:ffff888014107220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? show_regs+0x90/0xa0 ? die_addr+0x50/0xd0 ? exc_general_protection+0x148/0x220 ? asm_exc_general_protection+0x22/0x30 ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] ? __alloc_skb+0x1dd/0x350 ? __pfx___alloc_skb+0x10/0x10 genl_dumpit+0x11d/0x230 netlink_dump+0x5b9/0xce0 ? lockdep_hardirqs_on_prepare+0x253/0x430 ? __pfx_netlink_dump+0x10/0x10 ? kasan_save_track+0x10/0x40 ? __kasan_kmalloc+0x9b/0xa0 ? genl_start+0x675/0x970 __netlink_dump_start+0x6fc/0x9f0 genl_family_rcv_msg_dumpit+0x1bb/0x2d0 ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 ? genl_op_from_small+0x2a/0x440 ? cap_capable+0x1d0/0x240 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 ? security_capable+0x9d/0xe0 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index bace989591f75..937dd9cf4fbaf 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1906,20 +1906,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: -- GitLab From 62f361bfea60c6afc3df09c1ad4152e6507f6f47 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 30 Jan 2024 19:27:40 +0800 Subject: [PATCH 0314/1418] crypto: virtio/akcipher - Fix stack overflow on memcpy commit c0ec2a712daf133d9996a8a1b7ee2d4996080363 upstream. sizeof(struct virtio_crypto_akcipher_session_para) is less than sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from stack variable leads stack overflow. Clang reports this issue by commands: make -j CC=clang-14 mrproper >/dev/null 2>&1 make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ virtio_crypto_akcipher_algs.o Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ Cc: Signed-off-by: zhenwei pi Tested-by: Nathan Chancellor # build Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672e2e1..d2df97cfcb294 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); -- GitLab From 75eaa3666e2cce5656e8f0564a845a5da5a9617a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 19 Feb 2024 18:58:06 +0000 Subject: [PATCH 0315/1418] irqchip/gic-v3-its: Do not assume vPE tables are preallocated commit ec4308ecfc887128a468f03fb66b767559c57c23 upstream. The GIC/ITS code is designed to ensure to pick up any preallocated LPI tables on the redistributors, as enabling LPIs is a one-way switch. There is no such restriction for vLPIs, and for GICv4.1 it is expected to allocate a new vPE table at boot. This works as intended when initializing an ITS, however when setting up a redistributor in cpu_init_lpis() the early return for preallocated RD tables skips straight past the GICv4 setup. This all comes to a head when trying to kexec() into a new kernel, as the new kernel silently fails to set up GICv4, leading to a complete loss of SGIs and LPIs for KVM VMs. Slap a band-aid on the problem by ensuring its_cpu_init_lpis() always initializes GICv4 on the way out, even if the other RD tables were preallocated. Fixes: 6479450f72c1 ("irqchip/gic-v4: Fix occasional VLPI drop") Reported-by: George Cherian Co-developed-by: Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Oliver Upton Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240219185809.286724-2-oliver.upton@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index b83b39e93e1a9..4d03fb3a82460 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3161,6 +3161,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3196,7 +3197,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), -- GitLab From e90211b1f7ace2a57838343c2a2847cecebead7d Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 31 Jan 2024 09:19:33 +0100 Subject: [PATCH 0316/1418] irqchip/sifive-plic: Enable interrupt if needed before EOI commit 9c92006b896c767218aabe8947b62026a571cfd0 upstream. RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as explained in the description of Interrupt Completion in the PLIC spec: "The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that *is currently enabled* for the target, the completion is silently ignored." Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked") ensured that EOI is successful by enabling interrupt first, before EOI. Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") removed the interrupt enabling code from the previous commit, because it assumes that interrupt should already be enabled at the point of EOI. However, this is incorrect: there is a window after a hart claiming an interrupt and before irq_desc->lock getting acquired, interrupt can be disabled during this window. Thus, EOI can be invoked while the interrupt is disabled, effectively nullify this EOI. This results in the interrupt never gets asserted again, and the device who uses this interrupt appears frozen. Make sure that interrupt is really enabled before EOI. Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Samuel Holland Cc: Marc Zyngier Cc: Guo Ren Cc: linux-riscv@lists.infradead.org Cc: Link: https://lore.kernel.org/r/20240131081933.144512-1-namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-sifive-plic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 2f4784860df5d..be5e19a86ac3b 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -144,7 +144,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP -- GitLab From 2a19e0042bf14c9f5386b71d17089e2c6fc7dded Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 15 Jan 2024 19:26:49 +0530 Subject: [PATCH 0317/1418] PCI/MSI: Prevent MSI hardware interrupt number truncation commit db744ddd59be798c2627efbfc71f707f5a935a40 upstream. While calculating the hardware interrupt number for a MSI interrupt, the higher bits (i.e. from bit-5 onwards a.k.a domain_nr >= 32) of the PCI domain number gets truncated because of the shifted value casting to return type of pci_domain_nr() which is 'int'. This for example is resulting in same hardware interrupt number for devices 0019:00:00.0 and 0039:00:00.0. To address this cast the PCI domain number to 'irq_hw_number_t' before left shifting it to calculate the hardware interrupt number. Please note that this fixes the issue only on 64-bit systems and doesn't change the behavior for 32-bit systems i.e. the 32-bit systems continue to have the issue. Since the issue surfaces only if there are too many PCIe controllers in the system which usually is the case in modern server systems and they don't tend to run 32-bit kernels. Fixes: 3878eaefb89a ("PCI/MSI: Enhance core to support hierarchy irqdomain") Signed-off-by: Vidya Sagar Signed-off-by: Thomas Gleixner Tested-by: Shanker Donthineni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115135649.708536-1-vidyas@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index e9cf318e6670f..34877a1f43a15 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -60,7 +60,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) -- GitLab From 13cd1daeea848614e585b2c6ecc11ca9c8ab2500 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 20 Feb 2024 12:21:56 +0000 Subject: [PATCH 0318/1418] l2tp: pass correct message length to ip6_append_data commit 359e54a93ab43d32ee1bff3c2f9f10cb9f6b6e79 upstream. l2tp_ip6_sendmsg needs to avoid accounting for the transport header twice when splicing more data into an already partially-occupied skbuff. To manage this, we check whether the skbuff contains data using skb_queue_empty when deciding how much data to append using ip6_append_data. However, the code which performed the calculation was incorrect: ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; ...due to C operator precedence, this ends up setting ulen to transhdrlen for messages with a non-zero length, which results in corrupted packets on the wire. Add parentheses to correct the calculation in line with the original intent. Fixes: 9d4c75800f61 ("ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()") Cc: David Howells Cc: stable@vger.kernel.org Signed-off-by: Tom Parkin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220122156.43131-1-tparkin@katalix.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 314ec3a51e8de..bb92dc8b82f39 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, -- GitLab From 786f089086b505372fb3f4f008d57e7845fff0d8 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 5 Feb 2024 11:23:34 +0100 Subject: [PATCH 0319/1418] ARM: ep93xx: Add terminator to gpiod_lookup_table commit fdf87a0dc26d0550c60edc911cda42f9afec3557 upstream. Without the terminator, if a con_id is passed to gpio_find() that does not exist in the lookup table the function will not stop looping correctly, and eventually cause an oops. Cc: stable@vger.kernel.org Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") Reported-by: Andy Shevchenko Signed-off-by: Nikita Shubin Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240205102337.439002-1-alexander.sverdlin@gmail.com Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-ep93xx/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 95e731676cea4..961daac653261 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; -- GitLab From 1221b8ea25cc8db5096fa328b835b8b6fdb6180b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:25 +0200 Subject: [PATCH 0320/1418] x86/returnthunk: Allow different return thunks Upstream commit: 770ae1b709528a6a173b5c7b183818ee9b45e376 In preparation for call depth tracking on Intel SKL CPUs, make it possible to patch in a SKL specific return thunk. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.680469665@infradead.org Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/kernel/alternative.c | 17 +++++++++++++---- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- 5 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2f123d4fb85b5..04035edc5c758 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -222,6 +222,12 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_CALL_THUNKS +extern void (*x86_return_thunk)(void); +#else +#define x86_return_thunk (&__x86_return_thunk) +#endif + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6b8c93989aa31..5c11f688a1011 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -536,6 +536,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + +#ifdef CONFIG_CALL_THUNKS +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; +#endif + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -551,14 +556,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e07234ec7e237..ec51ce713dea4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -361,7 +361,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 3fbb491688275..b32134b093ec8 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -80,7 +80,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69aee6245e4a..7913440c0fd46 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) -- GitLab From b012dcf39d9e79f9a71aa72b485c740aba84b47f Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 24 Feb 2024 12:01:34 +0100 Subject: [PATCH 0321/1418] Revert "x86/alternative: Make custom return thunk unconditional" This reverts commit 53ebbe1c8c02aa7b7f072dd2f96bca4faa1daa59. Revert the backport of upstream commit: 095b8303f383 ("x86/alternative: Make custom return thunk unconditional") in order to backport the full version now that 770ae1b70952 ("x86/returnthunk: Allow different return thunks") has been backported. Revert it here so that the build breakage is kept at minimum. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 ---- arch/x86/kernel/cpu/bugs.c | 2 -- 2 files changed, 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 04035edc5c758..44c78af98af4b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -205,11 +205,7 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; -#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); -#else -static inline void __x86_return_thunk(void) {} -#endif extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43ded02..208e4938f35ba 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,8 +62,6 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; - /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { -- GitLab From 545a94ffc29a4afb3bec7cbb4b830cc9c35fad7a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:30 +0200 Subject: [PATCH 0322/1418] x86/alternative: Make custom return thunk unconditional Upstream commit: 095b8303f3835c68ac4a8b6d754ca1c3b6230711 There is infrastructure to rewrite return thunks to point to any random thunk one desires, unwrap that from CALL_THUNKS, which up to now was the sole user of that. [ bp: Make the thunks visible on 32-bit and add ifdeffery for the 32-bit builds. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.775293785@infradead.org Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 8 ++++---- arch/x86/kernel/alternative.c | 4 ---- arch/x86/kernel/cpu/bugs.c | 2 ++ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 44c78af98af4b..d3706de91a934 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -205,7 +205,11 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); +#else +static inline void __x86_return_thunk(void) {} +#endif extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); @@ -218,11 +222,7 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); -#ifdef CONFIG_CALL_THUNKS extern void (*x86_return_thunk)(void); -#else -#define x86_return_thunk (&__x86_return_thunk) -#endif #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5c11f688a1011..69f85e2746119 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -537,10 +537,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_RETHUNK -#ifdef CONFIG_CALL_THUNKS -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; -#endif - /* * Rewrite the compiler generated return thunk tail-calls. * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 208e4938f35ba..13dffc43ded02 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,6 +62,8 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { -- GitLab From 943c8b1fcc86210ac7a62cbba77360936554477c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 14:48:03 +0100 Subject: [PATCH 0323/1418] dm-integrity, dm-verity: reduce stack usage for recheck commit 66ad2fbcdbeab0edfd40c5d94f32f053b98c2320 upstream. The newly added integrity_recheck() function has another larger stack allocation, just like its caller integrity_metadata(). When it gets inlined, the combination of the two exceeds the warning limit for 32-bit architectures and possibly risks an overflow when this is called from a deep call chain through a file system: drivers/md/dm-integrity.c:1767:13: error: stack frame size (1048) exceeds limit (1024) in 'integrity_metadata' [-Werror,-Wframe-larger-than] 1767 | static void integrity_metadata(struct work_struct *w) Since the caller at this point is done using its checksum buffer, just reuse the same buffer in the new function to avoid the double allocation. [Mikulas: add "noinline" to integrity_recheck and verity_recheck. These functions are only called on error, so they shouldn't bloat the stack frame or code size of the caller.] Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Fixes: 9177f3c0dea6 ("dm-verity: recheck the hash after a failure") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 10 ++++------ drivers/md/dm-verity-target.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 10b01b2adc679..3da4359f51645 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1701,14 +1701,13 @@ failed: get_random_bytes(result, ic->tag_size); } -static void integrity_recheck(struct dm_integrity_io *dio) +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) { struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct dm_integrity_c *ic = dio->ic; struct bvec_iter iter; struct bio_vec bv; sector_t sector, logical_sector, area, offset; - char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; struct page *page; void *buffer; @@ -1744,9 +1743,8 @@ static void integrity_recheck(struct dm_integrity_io *dio) goto free_ret; } - integrity_sector_checksum(ic, logical_sector, buffer, - checksum_onstack); - r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, &dio->metadata_offset, ic->tag_size, TAG_CMP); if (r) { if (r > 0) { @@ -1859,7 +1857,7 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - integrity_recheck(dio); + integrity_recheck(dio, checksums); goto skip_io; } if (likely(checksums != checksums_onstack)) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 3b0d0bcd6f0d6..b48e1b59e6da4 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -483,8 +483,8 @@ static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter start, sector_t cur_block) +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) { struct page *page; void *buffer; -- GitLab From d9d24262535360fceebacaf93eaba281d4135c52 Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Wed, 21 Feb 2024 13:03:47 -0800 Subject: [PATCH 0324/1418] erofs: fix refcount on the metabuf used for inode lookup commit 56ee7db31187dc36d501622cb5f1415e88e01c2a upstream. In erofs_find_target_block() when erofs_dirnamecmp() returns 0, we do not assign the target metabuf. This causes the caller erofs_namei()'s erofs_put_metabuf() at the end to be not effective leaving the refcount on the page. As the page from metabuf (buf->page) is never put, such page cannot be migrated or reclaimed. Fix it now by putting the metabuf from previous loop and assigning the current metabuf to target before returning so caller erofs_namei() can do the final put as it was intended. Fixes: 500edd095648 ("erofs: use meta buffers for inode lookup") Cc: # 5.18+ Signed-off-by: Sandeep Dhavale Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240221210348.3667795-1-dhavale@google.com Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 0dc34721080c7..e8ccaa761bd63 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -137,24 +137,24 @@ static void *find_target_block_classic(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; + } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ -- GitLab From d4c7e4b1b0249598d2093d09be5a286f81fc4a36 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:08 +0100 Subject: [PATCH 0325/1418] serial: amba-pl011: Fix DMA transmission in RS485 mode commit 3b69e32e151bc4a4e3c785cbdb1f918d5ee337ed upstream. When DMA is used in RS485 mode make sure that the UARTs tx section is enabled before the DMA buffers are queued for transmission. Cc: stable@vger.kernel.org Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-2-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index c74eaf2552c32..2f0f05259778a 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1345,11 +1345,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1431,42 +1461,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; -- GitLab From 4dc87908b1836ac7194fd4bd2c89ff3d79ebc075 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 16 Feb 2024 00:41:02 +0000 Subject: [PATCH 0326/1418] usb: dwc3: gadget: Don't disconnect if not started commit b191a18cb5c47109ca696370a74a5062a70adfd0 upstream. Don't go through soft-disconnection sequence if the controller hasn't started. Otherwise, there will be timeout and warning reports from the soft-disconnection flow. Cc: stable@vger.kernel.org Fixes: 61a348857e86 ("usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-usb/20240215233536.7yejlj3zzkl23vjd@synopsys.com/T/#mb0661cd5f9272602af390c18392b9a36da4f96e6 Tested-by: Marek Szyprowski Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e3be9b929934e0680a6f4b8f6eb11b18ae9c7e07.1708043922.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 576c21bf77cda..b134110cc2ed5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2548,6 +2548,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* -- GitLab From 748cee4417f68b09ca8dbf8b702803ad2d082010 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 6 Feb 2024 11:40:18 +0100 Subject: [PATCH 0327/1418] usb: cdnsp: blocked some cdns3 specific code commit 18a6be674306c9acb05c08e5c3fd376ef50a917c upstream. host.c file has some parts of code that were introduced for CDNS3 driver and should not be used with CDNSP driver. This patch blocks using these parts of codes by CDNSP driver. These elements include: - xhci_plat_cdns3_xhci object - cdns3 specific XECP_PORT_CAP_REG register - cdns3 specific XECP_AUX_CTRL_REG1 register cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240206104018.48272-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a49..ceca4d839dfd4 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; -- GitLab From c66a8008489bddef8d9bab3545a7d868d95a8b38 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 15 Feb 2024 13:16:09 +0100 Subject: [PATCH 0328/1418] usb: cdnsp: fixed issue with incorrect detecting CDNSP family controllers commit 47625b018c6bc788bc10dd654c82696eb0a5ef11 upstream. Cadence have several controllers from 0x000403xx family but current driver suuport detecting only one with DID equal 0x0004034E. It causes that if someone uses different CDNSP controller then driver will use incorrect version and register space. Patch fix this issue. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240215121609.259772-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 1 - drivers/usb/cdns3/drd.c | 13 +++++++++---- drivers/usb/cdns3/drd.h | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 7b20d2d5c262e..7242591b346bc 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -394,7 +394,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index d00ff98dffabf..33ba30f79b337 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -418,15 +419,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -479,7 +485,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed91..d72370c321d39 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. -- GitLab From 2134e9906e17b1e5284300fab547869ebacfd7d9 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:16 -0500 Subject: [PATCH 0329/1418] usb: cdns3: fixed memory use after free at cdns3_gadget_ep_disable() commit cd45f99034b0c8c9cb346dd0d6407a95ca3d36f6 upstream. ... cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); list_del_init(&priv_req->list); ... 'priv_req' actually free at cdns3_gadget_ep_free_request(). But list_del_init() use priv_req->list after it. [ 1542.642868][ T534] BUG: KFENCE: use-after-free read in __list_del_entry_valid+0x10/0xd4 [ 1542.642868][ T534] [ 1542.653162][ T534] Use-after-free read at 0x000000009ed0ba99 (in kfence-#3): [ 1542.660311][ T534] __list_del_entry_valid+0x10/0xd4 [ 1542.665375][ T534] cdns3_gadget_ep_disable+0x1f8/0x388 [cdns3] [ 1542.671571][ T534] usb_ep_disable+0x44/0xe4 [ 1542.675948][ T534] ffs_func_eps_disable+0x64/0xc8 [ 1542.680839][ T534] ffs_func_set_alt+0x74/0x368 [ 1542.685478][ T534] ffs_func_disable+0x18/0x28 Move list_del_init() before cdns3_gadget_ep_free_request() to resolve this problem. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index ccdd525bd7c80..fb7d9d35cf625 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2537,11 +2537,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } -- GitLab From 9a52b694b066f299d8b9800854a8503457a8b64c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:17 -0500 Subject: [PATCH 0330/1418] usb: cdns3: fix memory double free when handle zero packet commit 5fd9e45f1ebcd57181358af28506e8a661a260b3 upstream. 829 if (request->complete) { 830 spin_unlock(&priv_dev->lock); 831 usb_gadget_giveback_request(&priv_ep->endpoint, 832 request); 833 spin_lock(&priv_dev->lock); 834 } 835 836 if (request->buf == priv_dev->zlp_buf) 837 cdns3_gadget_ep_free_request(&priv_ep->endpoint, request); Driver append an additional zero packet request when queue a packet, which length mod max packet size is 0. When transfer complete, run to line 831, usb_gadget_giveback_request() will free this requestion. 836 condition is true, so cdns3_gadget_ep_free_request() free this request again. Log: [ 1920.140696][ T150] BUG: KFENCE: use-after-free read in cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.140696][ T150] [ 1920.151837][ T150] Use-after-free read at 0x000000003d1cd10b (in kfence-#36): [ 1920.159082][ T150] cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.164988][ T150] cdns3_transfer_completed+0x438/0x5f8 [cdns3] Add check at line 829, skip call usb_gadget_giveback_request() if it is additional zero length packet request. Needn't call usb_gadget_giveback_request() because it is allocated in this driver. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-2-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index fb7d9d35cf625..2b8f98f0707e7 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -826,7 +826,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); -- GitLab From 35b604a37ec70d68b19dafd10bbacf1db505c9ca Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 5 Feb 2024 13:16:50 +0530 Subject: [PATCH 0331/1418] usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76c51146820c5dac629f21deafab0a7039bc3ccd upstream. It is observed sometimes when tethering is used over NCM with Windows 11 as host, at some instances, the gadget_giveback has one byte appended at the end of a proper NTB. When the NTB is parsed, unwrap call looks for any leftover bytes in SKB provided by u_ether and if there are any pending bytes, it treats them as a separate NTB and parses it. But in case the second NTB (as per unwrap call) is faulty/corrupt, all the datagrams that were parsed properly in the first NTB and saved in rx_list are dropped. Adding a few custom traces showed the following: [002] d..1 7828.532866: dwc3_gadget_giveback: ep1out: req 000000003868811a length 1025/16384 zsI ==> 0 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb toprocess: 1025 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb seq: 0xce67 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x400 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb ndp_len: 0x10 [002] d..1 7828.532869: ncm_unwrap_ntb: K: Parsed NTB with 1 frames In this case, the giveback is of 1025 bytes and block length is 1024. The rest 1 byte (which is 0x00) won't be parsed resulting in drop of all datagrams in rx_list. Same is case with packets of size 2048: [002] d..1 7828.557948: dwc3_gadget_giveback: ep1out: req 0000000011dfd96e length 2049/16384 zsI ==> 0 [002] d..1 7828.557949: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.557950: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x800 Lecroy shows one byte coming in extra confirming that the byte is coming in from PC: Transfer 2959 - Bytes Transferred(1025) Timestamp((18.524 843 590) - Transaction 8391 - Data(1025 bytes) Timestamp(18.524 843 590) --- Packet 4063861 Data(1024 bytes) Duration(2.117us) Idle(14.700ns) Timestamp(18.524 843 590) --- Packet 4063863 Data(1 byte) Duration(66.160ns) Time(282.000ns) Timestamp(18.524 845 722) According to Windows driver, no ZLP is needed if wBlockLength is non-zero, because the non-zero wBlockLength has already told the function side the size of transfer to be expected. However, there are in-market NCM devices that rely on ZLP as long as the wBlockLength is multiple of wMaxPacketSize. To deal with such devices, it pads an extra 0 at end so the transfer is no longer multiple of wMaxPacketSize. Cc: Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240205074650.200304-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bbb6ff6b11aa1..5e78fcc63e4d3 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1340,7 +1340,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- GitLab From 0158216805ca7e498d07de38840d2732166ae5fa Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:38 +0800 Subject: [PATCH 0332/1418] usb: roles: fix NULL pointer issue when put module's reference commit 1c9be13846c0b2abc2480602f8ef421360e1ad9e upstream. In current design, usb role class driver will get usb_role_switch parent's module reference after the user get usb_role_switch device and put the reference after the user put the usb_role_switch device. However, the parent device of usb_role_switch may be removed before the user put the usb_role_switch. If so, then, NULL pointer issue will be met when the user put the parent module's reference. This will save the module pointer in structure of usb_role_switch. Then, we don't need to find module by iterating long relations. Fixes: 5c54fcac9a9d ("usb: roles: Take care of driver module reference counting") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 32e6d19f7011a..d90d164238099 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,6 +19,7 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -133,7 +134,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -155,7 +156,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -170,7 +171,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -187,15 +188,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -337,6 +341,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; -- GitLab From 2f414a56b369129cfdac6ecaf9899f9807a264aa Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:39 +0800 Subject: [PATCH 0333/1418] usb: roles: don't get/set_role() when usb_role_switch is unregistered commit b787a3e781759026a6212736ef8e52cf83d1821a upstream. There is a possibility that usb_role_switch device is unregistered before the user put usb_role_switch. In this case, the user may still want to get/set_role() since the user can't sense the changes of usb_role_switch. This will add a flag to show if usb_role_switch is already registered and avoid unwanted behaviors. Fixes: fde0aa6c175a ("usb: common: Small class for USB role switches") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index d90d164238099..a327f8bc57043 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,6 +21,7 @@ struct usb_role_switch { struct mutex lock; /* device lock*/ struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -47,6 +48,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -72,7 +76,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -356,6 +360,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -370,8 +376,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); -- GitLab From 42a841a84ffd84f8166b9b4c5897a5f30feb7e1c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 14 Apr 2023 17:47:06 +0200 Subject: [PATCH 0334/1418] mptcp: make userspace_pm_append_new_local_addr static commit aa5887dca2d236fc50000e27023d4d78dce3af30 upstream. mptcp_userspace_pm_append_new_local_addr() has always exclusively been used in pm_userspace.c since its introduction in commit 4638de5aefe5 ("mptcp: handle local addrs announced by userspace PMs"). So make it static. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 4 ++-- net/mptcp/protocol.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2e1e0d0e3ec60..4acd31f84d213 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -25,8 +25,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } } -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) +static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 259672cc344f3..b092205213234 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -834,8 +834,6 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); -- GitLab From 9e8e59af3a4aad2494e0ea23c8bda2433a338349 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:28 +0100 Subject: [PATCH 0335/1418] mptcp: add needs_id for userspace appending addr commit 6c347be62ae963b301ead8e7fa7b9973e6e0d6e1 upstream. When userspace PM requires to create an ID 0 subflow in "userspace pm create id 0 subflow" test like this: userspace_pm_add_sf $ns2 10.0.3.2 0 An ID 1 subflow, in fact, is created. Since in mptcp_pm_nl_append_new_local_addr(), 'id 0' will be treated as no ID is set by userspace, and will allocate a new ID immediately: if (!e->addr.id) e->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); To solve this issue, a new parameter needs_id is added for mptcp_userspace_pm_append_new_local_addr() to distinguish between whether userspace PM has set an ID 0 or whether userspace PM has not set any address. needs_id is true in mptcp_userspace_pm_get_local_id(), but false in mptcp_pm_nl_announce_doit() and mptcp_pm_nl_subflow_create_doit(). Fixes: e5ed101a6028 ("mptcp: userspace pm allow creating id 0 subflow") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 4acd31f84d213..2895be3046f79 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -155,7 +156,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) @@ -197,7 +198,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -335,7 +336,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; -- GitLab From 71787c665d09a970b9280c285181d3a2d1bf3bb0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: [PATCH 0336/1418] mptcp: fix lockless access in subflow ULP diag commit b8adb69a7d29c2d33eb327bca66476fb6066516b upstream. Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +++++- net/tls/tls_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c838f7290dd9..8ea1fba84eff9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2290,7 +2290,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f28..e57c5f47f0351 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 80b42a3e78830..6b7189a520af7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1098,7 +1098,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; -- GitLab From a6cada89ee5af2194f3ae62bebe4a821eaebe690 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Mon, 29 Jan 2024 17:33:40 -0500 Subject: [PATCH 0337/1418] Revert "drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz" commit a538dabf772c169641e151834e161e241802ab33 upstream. [why]: This reverts commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a. The commit caused corruption when running some applications in fullscreen Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index baecc0ffe7580..85e0d1c2a9085 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2123,7 +2123,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From f0d857ce31a6bc7a82afcdbadb8f7417d482604b Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 12 Jan 2024 16:55:23 +0800 Subject: [PATCH 0338/1418] IB/hfi1: Fix a memleak in init_credit_return [ Upstream commit 809aa64ebff51eb170ee31a95f83b2d21efa32e2 ] When dma_alloc_coherent fails to allocate dd->cr_base[i].va, init_credit_return should deallocate dd->cr_base and dd->cr_base[i] that allocated before. Or those resources would be never freed and a memleak is triggered. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Zhipeng Lu Link: https://lore.kernel.org/r/20240112085523.3731720-1-alexious@zju.edu.cn Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/pio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51ae58c02b15c..802b0e5801a7d 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2089,7 +2089,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,6 +2097,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) -- GitLab From 75a64c641cf2c694556e8113e475235d7994c15e Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:36 -0800 Subject: [PATCH 0339/1418] RDMA/bnxt_re: Return error for SRQ resize [ Upstream commit 3687b450c5f32e80f179ce4b09e0454da1449eac ] SRQ resize is not supported in the driver. But driver is not returning error from bnxt_re_modify_srq() for SRQ resize. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4ed8814efde6f..6ed0568747eaa 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1710,7 +1710,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1725,13 +1725,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) -- GitLab From b2e4a5266e3d133b4c7f0e43bf40d13ce14fd1aa Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 31 Jan 2024 17:38:46 -0600 Subject: [PATCH 0340/1418] RDMA/irdma: Fix KASAN issue with tasklet [ Upstream commit bd97cea7b18a0a553773af806dfbfac27a7c4acb ] KASAN testing revealed the following issue assocated with freeing an IRQ. [50006.466686] Call Trace: [50006.466691] [50006.489538] dump_stack+0x5c/0x80 [50006.493475] print_address_description.constprop.6+0x1a/0x150 [50006.499872] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.505742] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.511644] kasan_report.cold.11+0x7f/0x118 [50006.516572] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.522473] irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.528232] irdma_process_ceq+0xb2/0x400 [irdma] [50006.533601] ? irdma_hw_flush_wqes_callback+0x370/0x370 [irdma] [50006.540298] irdma_ceq_dpc+0x44/0x100 [irdma] [50006.545306] tasklet_action_common.isra.14+0x148/0x2c0 [50006.551096] __do_softirq+0x1d0/0xaf8 [50006.555396] irq_exit_rcu+0x219/0x260 [50006.559670] irq_exit+0xa/0x20 [50006.563320] smp_apic_timer_interrupt+0x1bf/0x690 [50006.568645] apic_timer_interrupt+0xf/0x20 [50006.573341] The issue is that a tasklet could be pending on another core racing the delete of the irq. Fix by insuring any scheduled tasklet is killed after deleting the irq. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-2-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 311a1138e838d..12bd38b1e2c15 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -562,6 +562,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** -- GitLab From 429999729d4a85126cec4a1305db67d9a7774545 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 31 Jan 2024 17:38:47 -0600 Subject: [PATCH 0341/1418] RDMA/irdma: Validate max_send_wr and max_recv_wr [ Upstream commit ee107186bcfd25d7873258f3f75440e20f5e6416 ] Validate that max_send_wr and max_recv_wr is within the supported range. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Change-Id: I2fc8b10292b641fddd20b36986a9dae90a93f4be Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-3-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 01faec6ea5285..0fbc39df1e7d4 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -762,7 +762,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { -- GitLab From 6f4553096eceefe5b41e4c1bc03d9892cb903284 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:48 -0600 Subject: [PATCH 0342/1418] RDMA/irdma: Set the CQ read threshold for GEN 1 [ Upstream commit 666047f3ece9f991774c1fe9b223139a9ef8908d ] The CQ shadow read threshold is currently not set for GEN 2. This could cause an invalid CQ overflow condition, so remove the GEN check that exclused GEN 1. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-4-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 0fbc39df1e7d4..42c671f209233 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2121,9 +2121,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); -- GitLab From edc2a9afbebda04a43dd9bb4217b1becce0efc03 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:49 -0600 Subject: [PATCH 0343/1418] RDMA/irdma: Add AE for too many RNRS [ Upstream commit 630bdb6f28ca9e5ff79e244030170ac788478332 ] Add IRDMA_AE_LLP_TOO_MANY_RNRS to the list of AE's processed as an abnormal asyncronous event. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-5-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index ad54260cb58c9..ebe98fa2b1cd2 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -345,6 +345,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 12bd38b1e2c15..918a2d783141f 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -379,6 +379,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: -- GitLab From aee4dcfe17219fe60f2821923adea98549060af8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 4 Feb 2024 16:42:07 -0800 Subject: [PATCH 0344/1418] RDMA/srpt: Support specifying the srpt_service_guid parameter [ Upstream commit fdfa083549de5d50ebf7f6811f33757781e838c0 ] Make loading ib_srpt with this parameter set work. The current behavior is that setting that parameter while loading the ib_srpt kernel module triggers the following kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: parse_one+0x18c/0x1d0 parse_args+0xe1/0x230 load_module+0x8de/0xa60 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x181/0x240 __x64_sys_finit_module+0x5a/0xb0 do_syscall_64+0x5f/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Cc: LiHonggang Reported-by: LiHonggang Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240205004207.17031-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 25e799dba999e..4607d37b9224a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); -- GitLab From 8d3a5cbc1e92a0da912578afa44506679339aaf3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:07 +0000 Subject: [PATCH 0345/1418] iommufd/iova_bitmap: Bounds check mapped::pages access [ Upstream commit a4ab7dedaee0e39b15653c5fd0367e420739f7ef ] Dirty IOMMU hugepages reported on a base page page-size granularity can lead to an attempt to set dirty pages in the bitmap beyond the limits that are pinned. Bounds check the page index of the array we are trying to access is within the limits before we kmap() and return otherwise. While it is also a defensive check, this is also in preparation to defer setting bits (outside the mapped range) to the next iteration(s) when the pages become available. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-2-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 0f19d502f351b..5b540a164c98f 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -405,6 +405,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -413,6 +414,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); -- GitLab From c5bc02f60d278637101837c3c295021efeca74d9 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:08 +0000 Subject: [PATCH 0346/1418] iommufd/iova_bitmap: Switch iova_bitmap::bitmap to an u8 array [ Upstream commit d18411ec305728c6371806c4fb09be07016aad0b ] iova_bitmap_mapped_length() don't deal correctly with the small bitmaps (< 2M bitmaps) when the starting address isn't u64 aligned, leading to skipping a tiny part of the IOVA range. This is materialized as not marking data dirty that should otherwise have been. Fix that by using a u8 * in the internal state of IOVA bitmap. Most of the data structures use the type of the bitmap to adjust its indexes, thus changing the type of the bitmap decreases the granularity of the bitmap indexes. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-3-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 5b540a164c98f..c748a1e3ba53a 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -99,7 +99,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -161,7 +161,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -246,7 +246,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -301,7 +301,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } -- GitLab From 47e93d2f286eb062175ee1d89128887f0a2e8dc5 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:15 +0000 Subject: [PATCH 0347/1418] iommufd/iova_bitmap: Consider page offset for the pages to be pinned [ Upstream commit 4bbcbc6ea2fa379632a24c14cfb47aa603816ac6 ] For small bitmaps that aren't PAGE_SIZE aligned *and* that are less than 512 pages in bitmap length, use an extra page to be able to cover the entire range e.g. [1M..3G] which would be iterated more efficiently in a single iteration, rather than two. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index c748a1e3ba53a..dfab5b742191a 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -174,18 +174,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) bitmap->mapped_base_index) * sizeof(*bitmap->bitmap), PAGE_SIZE); - /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) -- GitLab From 7f31a244c753aacf40b71d01f03ca6742f81bbbc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 8 Feb 2024 17:36:28 -0500 Subject: [PATCH 0348/1418] RDMA/qedr: Fix qedr_create_user_qp error flow [ Upstream commit 5ba4e6d5863c53e937f49932dee0ecb004c65928 ] Avoid the following warning by making sure to free the allocated resources in case that qedr_init_user_queue() fail. -----------[ cut here ]----------- WARNING: CPU: 0 PID: 143192 at drivers/infiniband/core/rdma_core.c:874 uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Modules linked in: tls target_core_user uio target_core_pscsi target_core_file target_core_iblock ib_srpt ib_srp scsi_transport_srp nfsd nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs 8021q garp mrp stp llc ext4 mbcache jbd2 opa_vnic ib_umad ib_ipoib sunrpc rdma_ucm ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm hfi1 intel_rapl_msr intel_rapl_common mgag200 qedr sb_edac drm_shmem_helper rdmavt x86_pkg_temp_thermal drm_kms_helper intel_powerclamp ib_uverbs coretemp i2c_algo_bit kvm_intel dell_wmi_descriptor ipmi_ssif sparse_keymap kvm ib_core rfkill syscopyarea sysfillrect video sysimgblt irqbypass ipmi_si ipmi_devintf fb_sys_fops rapl iTCO_wdt mxm_wmi iTCO_vendor_support intel_cstate pcspkr dcdbas intel_uncore ipmi_msghandler lpc_ich acpi_power_meter mei_me mei fuse drm xfs libcrc32c qede sd_mod ahci libahci t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel qed libata tg3 ghash_clmulni_intel megaraid_sas crc8 wmi [last unloaded: ib_srpt] CPU: 0 PID: 143192 Comm: fi_rdm_tagged_p Kdump: loaded Not tainted 5.14.0-408.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.14.0 01/25/2022 RIP: 0010:uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Code: 5d 41 5c 41 5d 41 5e e9 0f 26 1b dd 48 89 df e8 67 6a ff ff 49 8b 86 10 01 00 00 48 85 c0 74 9c 4c 89 e7 e8 83 c0 cb dd eb 92 <0f> 0b eb be 0f 0b be 04 00 00 00 48 89 df e8 8e f5 ff ff e9 6d ff RSP: 0018:ffffb7c6cadfbc60 EFLAGS: 00010286 RAX: ffff8f0889ee3f60 RBX: ffff8f088c1a5200 RCX: 00000000802a0016 RDX: 00000000802a0017 RSI: 0000000000000001 RDI: ffff8f0880042600 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8f11fffd5000 R11: 0000000000039000 R12: ffff8f0d5b36cd80 R13: ffff8f088c1a5250 R14: ffff8f1206d91000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8f11d7c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000147069200e20 CR3: 00000001c7210002 CR4: 00000000001706f0 Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? ib_uverbs_close+0x1f/0xb0 [ib_uverbs] ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? __warn+0x81/0x110 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? report_bug+0x10a/0x140 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x94/0x250 task_work_run+0x5c/0x90 do_exit+0x270/0x4a0 do_group_exit+0x2d/0x90 get_signal+0x87c/0x8c0 arch_do_signal_or_restart+0x25/0x100 ? ib_uverbs_ioctl+0xc2/0x110 [ib_uverbs] exit_to_user_mode_loop+0x9c/0x130 exit_to_user_mode_prepare+0xb6/0x100 syscall_exit_to_user_mode+0x12/0x40 do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? do_syscall_64+0x69/0x90 ? common_interrupt+0x43/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x1470abe3ec6b Code: Unable to access opcode bytes at RIP 0x1470abe3ec41. RSP: 002b:00007fff13ce9108 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: fffffffffffffffc RBX: 00007fff13ce9218 RCX: 00001470abe3ec6b RDX: 00007fff13ce9200 RSI: 00000000c0181b01 RDI: 0000000000000004 RBP: 00007fff13ce91e0 R08: 0000558d9655da10 R09: 0000558d9655dd00 R10: 00007fff13ce95c0 R11: 0000000000000246 R12: 00007fff13ce9358 R13: 0000000000000013 R14: 0000558d9655db50 R15: 00007fff13ce9470 --[ end trace 888a9b92e04c5c97 ]-- Fixes: df15856132bc ("RDMA/qedr: restructure functions that create/destroy QPs") Signed-off-by: Kamal Heib Link: https://lore.kernel.org/r/20240208223628.2040841-1-kheib@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc88aa..61755b5f3e20d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); -- GitLab From 5f69c475c147f24003081cc6d9a77eec97b5a64d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 19 Jan 2024 11:16:56 +0100 Subject: [PATCH 0349/1418] arm64: dts: rockchip: set num-cs property for spi on px30 [ Upstream commit 334bf0710c98d391f4067b72f535d6c4c84dfb6f ] The px30 has two spi controllers with two chip-selects each. The num-cs property is specified as the total number of chip selects a controllers has and is used since 2020 to find uses of chipselects outside that range in the Rockchip spi driver. Without the property set, the default is 1, so spi devices using the second chipselect will not be created. Fixes: eb1262e3cc8b ("spi: spi-rockchip: use num-cs property and ctlr->enable_gpiods") Signed-off-by: Heiko Stuebner Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240119101656.965744-1-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/px30.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index bfa3580429d10..61f0186447dad 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -607,6 +607,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -622,6 +623,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; -- GitLab From a9409d33af61c8c7f58540aaca8abf12c0c99589 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:07:13 +0100 Subject: [PATCH 0350/1418] RDMA/srpt: fix function pointer cast warnings [ Upstream commit eb5c7465c3240151cd42a55c7ace9da0026308a1 ] clang-16 notices that srpt_qp_event() gets called through an incompatible pointer here: drivers/infiniband/ulp/srpt/ib_srpt.c:1815:5: error: cast from 'void (*)(struct ib_event *, struct srpt_rdma_ch *)' to 'void (*)(struct ib_event *, void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1815 | = (void(*)(struct ib_event *, void*))srpt_qp_event; Change srpt_qp_event() to use the correct prototype and adjust the argument inside of it. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100728.458348-1-arnd@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4607d37b9224a..cffa93f114a73 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1811,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; -- GitLab From 758b8f5e04988e95a9493fe7882075e73c76a82d Mon Sep 17 00:00:00 2001 From: Gianmarco Lusvardi Date: Tue, 13 Feb 2024 23:05:46 +0000 Subject: [PATCH 0351/1418] bpf, scripts: Correct GPL license name [ Upstream commit e37243b65d528a8a9f8b9a57a43885f8e8dfc15c ] The bpf_doc script refers to the GPL as the "GNU Privacy License". I strongly suspect that the author wanted to refer to the GNU General Public License, under which the Linux kernel is released, as, to the best of my knowledge, there is no license named "GNU Privacy License". This patch corrects the license name in the script accordingly. Fixes: 56a092c89505 ("bpf: add script and prepare bpf.h for new helpers documentation") Signed-off-by: Gianmarco Lusvardi Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240213230544.930018-3-glusvardi@posteo.net Signed-off-by: Sasha Levin --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5c389df6045e..4de98b7bbea95 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -495,7 +495,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\ () system call, and this -- GitLab From 3c31b18a8dd8b7bf36af1cd723d455853b8f94fe Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 13 Feb 2024 10:22:00 -0600 Subject: [PATCH 0352/1418] scsi: smartpqi: Fix disable_managed_interrupts [ Upstream commit 5761eb9761d2d5fe8248a9b719efc4d8baf1f24a ] Correct blk-mq registration issue with module parameter disable_managed_interrupts enabled. When we turn off the default PCI_IRQ_AFFINITY flag, the driver needs to register with blk-mq using blk_mq_map_queues(). The driver is currently calling blk_mq_pci_map_queues() which results in a stack trace and possibly undefined behavior. Stack Trace: [ 7.860089] scsi host2: smartpqi [ 7.871934] WARNING: CPU: 0 PID: 238 at block/blk-mq-pci.c:52 blk_mq_pci_map_queues+0xca/0xd0 [ 7.889231] Modules linked in: sd_mod t10_pi sg uas smartpqi(+) crc32c_intel scsi_transport_sas usb_storage dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse [ 7.924755] CPU: 0 PID: 238 Comm: kworker/0:3 Not tainted 4.18.0-372.88.1.el8_6_smartpqi_test.x86_64 #1 [ 7.944336] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 03/08/2022 [ 7.963026] Workqueue: events work_for_cpu_fn [ 7.978275] RIP: 0010:blk_mq_pci_map_queues+0xca/0xd0 [ 7.978278] Code: 48 89 de 89 c7 e8 f6 0f 4f 00 3b 05 c4 b7 8e 01 72 e1 5b 31 c0 5d 41 5c 41 5d 41 5e 41 5f e9 7d df 73 00 31 c0 e9 76 df 73 00 <0f> 0b eb bc 90 90 0f 1f 44 00 00 41 57 49 89 ff 41 56 41 55 41 54 [ 7.978280] RSP: 0018:ffffa95fc3707d50 EFLAGS: 00010216 [ 7.978283] RAX: 00000000ffffffff RBX: 0000000000000000 RCX: 0000000000000010 [ 7.978284] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffff9190c32d4310 [ 7.978286] RBP: 0000000000000000 R08: ffffa95fc3707d38 R09: ffff91929b81ac00 [ 7.978287] R10: 0000000000000001 R11: ffffa95fc3707ac0 R12: 0000000000000000 [ 7.978288] R13: ffff9190c32d4000 R14: 00000000ffffffff R15: ffff9190c4c950a8 [ 7.978290] FS: 0000000000000000(0000) GS:ffff9193efc00000(0000) knlGS:0000000000000000 [ 7.978292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.172814] CR2: 000055d11166c000 CR3: 00000002dae10002 CR4: 00000000007706f0 [ 8.172816] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.172817] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.172818] PKRU: 55555554 [ 8.172819] Call Trace: [ 8.172823] blk_mq_alloc_tag_set+0x12e/0x310 [ 8.264339] scsi_add_host_with_dma.cold.9+0x30/0x245 [ 8.279302] pqi_ctrl_init+0xacf/0xc8e [smartpqi] [ 8.294085] ? pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.309015] pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.323286] local_pci_probe+0x42/0x80 [ 8.337855] work_for_cpu_fn+0x16/0x20 [ 8.351193] process_one_work+0x1a7/0x360 [ 8.364462] ? create_worker+0x1a0/0x1a0 [ 8.379252] worker_thread+0x1ce/0x390 [ 8.392623] ? create_worker+0x1a0/0x1a0 [ 8.406295] kthread+0x10a/0x120 [ 8.418428] ? set_kthread_struct+0x50/0x50 [ 8.431532] ret_from_fork+0x1f/0x40 [ 8.444137] ---[ end trace 1bf0173d39354506 ]--- Fixes: cf15c3e734e8 ("scsi: smartpqi: Add module param to disable managed ints") Tested-by: Yogesh Chandra Pandey Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mahesh Rajashekhara Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20240213162200.1875970-2-don.brace@microchip.com Reviewed-by: Tomas Henzl Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 47d487729635c..e44f6bb25a8ea 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6449,8 +6449,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) -- GitLab From 44148c1c82459e3227dca917f888c9bd3d73daae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Feb 2024 21:59:53 -0800 Subject: [PATCH 0353/1418] scsi: jazz_esp: Only build if SCSI core is builtin [ Upstream commit 9ddf190a7df77b77817f955fdb9c2ae9d1c9c9a3 ] JAZZ_ESP is a bool kconfig symbol that selects SCSI_SPI_ATTRS. When CONFIG_SCSI=m, this results in SCSI_SPI_ATTRS=m while JAZZ_ESP=y, which causes many undefined symbol linker errors. Fix this by only offering to build this driver when CONFIG_SCSI=y. [mkp: JAZZ_ESP is unique in that it does not support being compiled as a module unlike the remaining SPI SCSI HBA drivers] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240214055953.9612-1-rdunlap@infradead.org Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nicolas Schier Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Cc: Geert Uytterhoeven Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402112222.Gl0udKyU-lkp@intel.com/ Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 03e71e3d5e5b3..3b990cf2c1954 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1285,7 +1285,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum -- GitLab From 2d5b4b3376fa146a23917b8577064906d643925f Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: [PATCH 0354/1418] net: bridge: switchdev: Skip MDB replays of deferred events on offload [ Upstream commit dc489f86257cab5056e747344f17a164f63bff4b ] Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/switchdev.h | 3 ++ net/bridge/br_switchdev.c | 74 ++++++++++++++++++++++++--------------- net/switchdev/switchdev.c | 73 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 28 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0bc33..a3d8f013adcd5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -303,6 +303,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4b3982c368b35..65567d1c8b853 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -593,21 +593,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; + br_switchdev_mdb_populate(&mdb, mp); - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) + return -ENOMEM; + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -675,51 +694,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. */ - rcu_read_lock(); + spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8cc42aea19c7e..2e14d4c37e2dc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include #include +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); -- GitLab From 91ac2c79e896b28a4a3a262384689ee6dfeaf083 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:04 +0100 Subject: [PATCH 0355/1418] net: bridge: switchdev: Ensure deferred event delivery on unoffload [ Upstream commit f7a70d650b0b6b0134ccba763d672c8439d9f09b ] When unoffloading a device, it is important to ensure that all relevant deferred events are delivered to it before it disassociates itself from the bridge. Before this change, this was true for the normal case when a device maps 1:1 to a net_bridge_port, i.e. br0 / swp0 When swp0 leaves br0, the call to switchdev_deferred_process() in del_nbp() makes sure to process any outstanding events while the device is still associated with the bridge. In the case when the association is indirect though, i.e. when the device is attached to the bridge via an intermediate device, like a LAG... br0 / lag0 / swp0 ...then detaching swp0 from lag0 does not cause any net_bridge_port to be deleted, so there was no guarantee that all events had been processed before the device disassociated itself from the bridge. Fix this by always synchronously processing all deferred events before signaling completion of unoffloading back to the driver. Fixes: 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_switchdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 65567d1c8b853..b61ef2dff7a4b 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -798,6 +798,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a -- GitLab From 729bc77af438a6e67914c97f6f3d3af8f72c0131 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Feb 2024 11:13:08 -0800 Subject: [PATCH 0356/1418] dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished(). [ Upstream commit 66b60b0c8c4a163b022a9f0ad6769b0fd3dc662f ] syzkaller reported a warning [0] in inet_csk_destroy_sock() with no repro. WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); However, the syzkaller's log hinted that connect() failed just before the warning due to FAULT_INJECTION. [1] When connect() is called for an unbound socket, we search for an available ephemeral port. If a bhash bucket exists for the port, we call __inet_check_established() or __inet6_check_established() to check if the bucket is reusable. If reusable, we add the socket into ehash and set inet_sk(sk)->inet_num. Later, we look up the corresponding bhash2 bucket and try to allocate it if it does not exist. Although it rarely occurs in real use, if the allocation fails, we must revert the changes by check_established(). Otherwise, an unconnected socket could illegally occupy an ehash entry. Note that we do not put tw back into ehash because sk might have already responded to a packet for tw and it would be better to free tw earlier under such memory presure. [0]: WARNING: CPU: 0 PID: 350830 at net/ipv4/inet_connection_sock.c:1193 inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Code: 41 5c 41 5d 41 5e e9 2d 4a 3d fd e8 28 4a 3d fd 48 89 ef e8 f0 cd 7d ff 5b 5d 41 5c 41 5d 41 5e e9 13 4a 3d fd e8 0e 4a 3d fd <0f> 0b e9 61 fe ff ff e8 02 4a 3d fd 4c 89 e7 be 03 00 00 00 e8 05 RSP: 0018:ffffc9000b21fd38 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000009e78 RCX: ffffffff840bae40 RDX: ffff88806e46c600 RSI: ffffffff840bb012 RDI: ffff88811755cca8 RBP: ffff88811755c880 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000009e78 R11: 0000000000000000 R12: ffff88811755c8e0 R13: ffff88811755c892 R14: ffff88811755c918 R15: 0000000000000000 FS: 00007f03e5243800(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f21000 CR3: 0000000112ffe001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: ? inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) dccp_close (net/dccp/proto.c:1078) inet_release (net/ipv4/af_inet.c:434) __sock_release (net/socket.c:660) sock_close (net/socket.c:1423) __fput (fs/file_table.c:377) __fput_sync (fs/file_table.c:462) __x64_sys_close (fs/open.c:1557 fs/open.c:1539 fs/open.c:1539) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e53852bb Code: 03 00 00 00 0f 05 48 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 43 c9 f5 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 c9 f5 ff 8b 44 RSP: 002b:00000000005dfba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f03e53852bb RDX: 0000000000000002 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000167c R10: 0000000008a79680 R11: 0000000000000293 R12: 00007f03e4e43000 R13: 00007f03e4e43170 R14: 00007f03e4e43178 R15: 00007f03e4e43170 [1]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 350833 Comm: syz-executor.1 Not tainted 6.7.0-12272-g2121c43f88f5 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3748) kmem_cache_alloc (mm/slub.c:3763 mm/slub.c:3842 mm/slub.c:3867) inet_bind2_bucket_create (net/ipv4/inet_hashtables.c:135) __inet_hash_connect (net/ipv4/inet_hashtables.c:1100) dccp_v4_connect (net/dccp/ipv4.c:116) __inet_stream_connect (net/ipv4/af_inet.c:676) inet_stream_connect (net/ipv4/af_inet.c:747) __sys_connect_file (net/socket.c:2048 (discriminator 2)) __sys_connect (net/socket.c:2065) __x64_sys_connect (net/socket.c:2072) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e5284e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f03e4641cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f03e5284e5d RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f03e52e5530 R15: 0000000000000000 Reported-by: syzkaller Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f2ed2aed08ab3..56776e1b1de52 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1111,10 +1111,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } -- GitLab From b06a3b1cbdfbfec85af1efda3c55cc9d8a492b08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:57:37 +0100 Subject: [PATCH 0357/1418] nouveau: fix function cast warnings [ Upstream commit 0affdba22aca5573f9d989bcb1d71d32a6a03efe ] clang-16 warns about casting between incompatible function types: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c:161:10: error: cast from 'void (*)(const struct firmware *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 161 | .fini = (void(*)(void *))release_firmware, This one was done to use the generic shadow_fw_release() function as a callback for struct nvbios_source. Change it to use the same prototype as the other five instances, with a trivial helper function that actually calls release_firmware. Fixes: 70c0f263cc2e ("drm/nouveau/bios: pull in basic vbios subdev, more to come later") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240213095753.455062-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fca..8c2bf1c16f2a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; -- GitLab From ca4a1c00beffe9c1f090ea0a5912e00975146578 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: [PATCH 0358/1418] x86/numa: Fix the address overlap check in numa_fill_memblks() [ Upstream commit 9b99c17f7510bed2adbe17751fb8abddba5620bc ] numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 19 +++++++------------ include/linux/memblock.h | 2 ++ mm/memblock.c | 5 +++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa39d678fe81d..e60c61b8bbc61 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 50ad19662a322..6790f08066b72 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1d..516efec80851a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -175,8 +175,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -- GitLab From 25bd33b87f8a337c7dbbb2887c82120ef99851e7 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:51 -0800 Subject: [PATCH 0359/1418] x86/numa: Fix the sort compare func used in numa_fill_memblks() [ Upstream commit b626070ffc14acca5b87a2aa5f581db98617584c ] The compare function used to sort memblks into starting address order fails when the result of its u64 address subtraction gets truncated to an int upon return. The impact of the bad sort is that memblks will be filled out incorrectly. Depending on the set of memblks, a user may see no errors at all but still have a bad fill, or see messages reporting a node overlap that leads to numa init failure: [] node 0 [mem: ] overlaps with node 1 [mem: ] [] No NUMA configuration found Replace with a comparison that can only result in: 1, 0, -1. Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/99dcb3ae87e04995e9f293f6158dc8fa0749a487.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e60c61b8bbc61..dae5c952735c7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; -- GitLab From 8e29f988ad32e87fbfe263d7d22d24c6a9eaba05 Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 14 Feb 2024 12:27:17 +0300 Subject: [PATCH 0360/1418] net: stmmac: Fix incorrect dereference in interrupt handlers [ Upstream commit 97dde84026339e4b4af9a6301f825d1828d7874b ] If 'dev' or 'data' is NULL, the 'priv' variable has an incorrect address when dereferencing calling netdev_err(). Since we get as 'dev_id' or 'data' what was passed as the 'dev' argument to request_irq() during interrupt initialization (that is, the net_device and rx/tx queue pointers initialized at the time of the call) and since there are usually no checks for the 'dev_id' argument in such handlers in other drivers, remove these checks from the handlers in stmmac driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX") Signed-off-by: Pavel Sakharov Reviewed-by: Serge Semin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 66178ce6d000e..91b2aa81914ba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5823,11 +5823,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5843,11 +5838,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5869,11 +5859,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5900,11 +5885,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; -- GitLab From b43a4fb42fefa69b00a4bc697c59b8107d9293cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:06 +0000 Subject: [PATCH 0361/1418] ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid [ Upstream commit 081a0e3b0d4c061419d3f4679dec9f68725b17e4 ] net->dev_base_seq and ipv4.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 0465277f6b3f ("ipv4: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 35d6e74be8406..bb0d1252cad86 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1804,6 +1804,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1855,8 +1870,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2257,8 +2271,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; -- GitLab From e5703735e57a3f1171b9b708f58850af55ef9602 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:07 +0000 Subject: [PATCH 0362/1418] ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid [ Upstream commit e898e4cd1aab271ca414f9ac6e08e4c761f6913c ] net->dev_base_seq and ipv6.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 63998ac24f83 ("ipv6: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Signed-off-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b8dc20fe7a4e2..46527b5cc8f0c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -706,6 +706,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +755,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5326,7 +5341,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; -- GitLab From 9a581b17b7227bfba3f109f0452d61582b3b662e Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 16 Feb 2024 23:44:57 +0530 Subject: [PATCH 0363/1418] ata: ahci_ceva: fix error handling for Xilinx GT PHY support [ Upstream commit 26c8404e162b43dddcb037ba2d0cb58c0ed60aab ] Platform clock and phy error resources are not cleaned up in Xilinx GT PHY error path. To fix introduce the function ceva_ahci_platform_enable_resources() which is a customized version of ahci_platform_enable_resources() and inline with SATA IP programming sequence it does: - Assert SATA reset - Program PS GTR phy - Bring SATA by de-asserting the reset - Wait for GT lane PLL to be locked ceva_ahci_platform_enable_resources() is also used in the resume path as the same SATA programming sequence (as in probe) should be followed. Also cleanup the mixed usage of ahci_platform_enable_resources() and custom implementation in the probe function as both are not required. Fixes: 9a9d3abe24bb ("ata: ahci: ceva: Update the driver to support xilinx GT phy") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci_ceva.c | 125 +++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index cb24ecf36fafe..50e07ea60e45c 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; -- GitLab From addf5e297e6cbf5341f9c07720693ca9ba0057b5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Feb 2024 13:12:17 -0800 Subject: [PATCH 0364/1418] bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel [ Upstream commit 0281b919e175bb9c3128bd3872ac2903e9436e3f ] The following race is possible between bpf_timer_cancel_and_free and bpf_timer_cancel. It will lead a UAF on the timer->timer. bpf_timer_cancel(); spin_lock(); t = timer->time; spin_unlock(); bpf_timer_cancel_and_free(); spin_lock(); t = timer->timer; timer->timer = NULL; spin_unlock(); hrtimer_cancel(&t->timer); kfree(t); /* UAF on t */ hrtimer_cancel(&t->timer); In bpf_timer_cancel_and_free, this patch frees the timer->timer after a rcu grace period. This requires a rcu_head addition to the "struct bpf_hrtimer". Another kfree(t) happens in bpf_timer_init, this does not need a kfree_rcu because it is still under the spin_lock and timer->timer has not been visible by others yet. In bpf_timer_cancel, rcu_read_lock() is added because this helper can be used in a non rcu critical section context (e.g. from a sleepable bpf prog). Other timer->timer usages in helpers.c have been audited, bpf_timer_cancel() is the only place where timer->timer is used outside of the spin_lock. Another solution considered is to mark a t->flag in bpf_timer_cancel and clear it after hrtimer_cancel() is done. In bpf_timer_cancel_and_free, it busy waits for the flag to be cleared before kfree(t). This patch goes with a straight forward solution and frees timer->timer after a rcu grace period. Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240215211218.990808-1-martin.lau@linux.dev Signed-off-by: Sasha Levin --- kernel/bpf/helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6a61a98d602cd..83f8f67e933df 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1091,6 +1091,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1312,6 +1313,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1333,6 +1335,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1387,7 +1390,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) -- GitLab From e8530b170e464017203e3b8c6c49af6e916aece1 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 19 Feb 2024 14:39:03 +0000 Subject: [PATCH 0365/1418] afs: Increase buffer size in afs_update_volume_status() [ Upstream commit 6ea38e2aeb72349cad50e38899b0ba6fbcb2af3d ] The max length of volume->vid value is 20 characters. So increase idbuf[] size up to 24 to avoid overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. [DH: Actually, it's 20 + NUL, so increase it to 24 and use snprintf()] Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Daniil Dulov Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240211150442.3416-1-d.dulov@aladdin.ru/ # v1 Link: https://lore.kernel.org/r/20240212083347.10742-1-d.dulov@aladdin.ru/ # v2 Link: https://lore.kernel.org/r/20240219143906.138346-3-dhowells@redhat.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/volume.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 1c9144e3e83ac..a146d70efa650 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -341,7 +341,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -349,7 +349,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { -- GitLab From 8391b9b651cfdf80ab0f1dc4a489f9d67386e197 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 0366/1418] ipv6: sr: fix possible use-after-free and null-ptr-deref [ Upstream commit 5559cea2d5aa3018a5f00dd2aca3427ba09b386b ] The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9ff..35508abd76f43 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } -- GitLab From fd84a5fae03c0a6ca66d6d7eb9f473b2c7957c21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 02:56:03 -0700 Subject: [PATCH 0367/1418] net: dev: Convert sa_data to flexible array in struct sockaddr [ Upstream commit b5f0de6df6dce8d641ef58ef7012f3304dffb9a1 ] One of the worst offenders of "fake flexible arrays" is struct sockaddr, as it is the classic example of why GCC and Clang have been traditionally forced to treat all trailing arrays as fake flexible arrays: in the distant misty past, sa_data became too small, and code started just treating it as a flexible array, even though it was fixed-size. The special case by the compiler is specifically that sizeof(sa->sa_data) and FORTIFY_SOURCE (which uses __builtin_object_size(sa->sa_data, 1)) do not agree (14 and -1 respectively), which makes FORTIFY_SOURCE treat it as a flexible array. However, the coming -fstrict-flex-arrays compiler flag will remove these special cases so that FORTIFY_SOURCE can gain coverage over all the trailing arrays in the kernel that are _not_ supposed to be treated as a flexible array. To deal with this change, convert sa_data to a true flexible array. To keep the structure size the same, move sa_data into a union with a newly introduced sa_data_min with the original size. The result is that FORTIFY_SOURCE can continue to have no idea how large sa_data may actually be, but anything using sizeof(sa->sa_data) must switch to sizeof(sa->sa_data_min). Cc: Jens Axboe Cc: Pavel Begunkov Cc: David Ahern Cc: Dylan Yudaken Cc: Yajun Deng Cc: Petr Machata Cc: Hangbin Liu Cc: Leon Romanovsky Cc: syzbot Cc: Willem de Bruijn Cc: Pablo Neira Ayuso Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221018095503.never.671-kees@kernel.org Signed-off-by: Jakub Kicinski Stable-dep-of: a7d6027790ac ("arp: Prevent overflow in arp_req_get().") Signed-off-by: Sasha Levin --- include/linux/socket.h | 5 ++++- net/core/dev.c | 2 +- net/core/dev_ioctl.c | 2 +- net/packet/af_packet.c | 10 +++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index b3c58042bd254..d79efd0268809 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/net/core/dev.c b/net/core/dev.c index 1ba3662faf0aa..60619fe8af5fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8861,7 +8861,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3076c..5cdbfbf9a7dcf 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51882f07ef70c..c3117350f5fbb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3566,11 +3566,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); -- GitLab From 38c83c2488dc3726aad855c05ce91d28e968b9f3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:32 +0000 Subject: [PATCH 0368/1418] arm64/sme: Restore SME registers on exit from suspend [ Upstream commit 9533864816fb4a6207c63b7a98396351ce1a9fae ] The fields in SMCR_EL1 and SMPRI_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. The vector length will be restored as part of restoring the SME state for the next SME using task. Fixes: a1f4ccd25cc2 ("arm64/sme: Provide Kconfig for SME") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-1-17e05e493471@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d720b6f7e5f9c..da18413712c04 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -343,6 +343,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -372,6 +373,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t za_state_size(struct task_struct const *task) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c226d79abdfc..59b5a16bab5d6 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1347,6 +1347,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8b02d310838f9..064d996cc55b2 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } -- GitLab From 6216509a2e11d6600aa24b92ba2735f820be4eae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 20:23:11 -0600 Subject: [PATCH 0369/1418] platform/x86: thinkpad_acpi: Only update profile if successfully converted [ Upstream commit 427c70dec738318b7f71e1b9d829ff0e9771d493 ] Randomly a Lenovo Z13 will trigger a kernel warning traceback from this condition: ``` if (WARN_ON((profile < 0) || (profile >= ARRAY_SIZE(profile_names)))) ``` This happens because thinkpad-acpi always assumes that convert_dytc_to_profile() successfully updated the profile. On the contrary a condition can occur that when dytc_profile_refresh() is called the profile doesn't get updated as there is a -EOPNOTSUPP branch. Catch this situation and avoid updating the profile. Also log this into dynamic debugging in case any other modes should be added in the future. Fixes: c3bfcd4c6762 ("platform/x86: thinkpad_acpi: Add platform profile support") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217022311.113879-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 6edd2e294750e..c2fb19af10705 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10511,6 +10511,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10696,8 +10697,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } -- GitLab From 18580e48e624f04bcd1ed854ccc2d8f2e2225e02 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Feb 2024 18:25:14 +0530 Subject: [PATCH 0370/1418] octeontx2-af: Consider the action set by PF [ Upstream commit 3b1ae9b71c2a97f848b00fb085a2bd29bddbe8d9 ] AF reserves MCAM entries for each PF, VF present in the system and populates the entry with DMAC and action with default RSS so that basic packet I/O works. Since PF/VF is not aware of the RSS action installed by AF, AF only fixup the actions of the rules installed by PF/VF with corresponding default RSS action. This worked well for rules installed by PF/VF for features like RX VLAN offload and DMAC filters but rules involving action like drop/forward to queue are also getting modified by AF. Hence fix it by setting the default RSS action only if requested by PF/VF. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3784347b6fd88..55639c133dd02 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -437,6 +437,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); -- GitLab From 11277d18926717a3c00ef745714b16f82b8f8504 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Feb 2024 20:48:14 -0400 Subject: [PATCH 0371/1418] s390: use the correct count for __iowrite64_copy() [ Upstream commit 723a2cc8d69d4342b47dfddbfe6c19f1b135f09b ] The signature for __iowrite64_copy() requires the number of 64 bit quantities, not bytes. Multiple by 8 to get to a byte length before invoking zpci_memcpy_toio() Fixes: 87bc359b9822 ("s390/pci: speed up __iowrite64_copy by using pci store block insn") Acked-by: Niklas Schnelle Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-9223d11a7662+1d7785-s390_iowrite64_jgg@nvidia.com Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2c99f9552b2f5..394c69fda399e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) -- GitLab From 4588b13abcbd561ec67f5b3c1cb2eff690990a54 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 19 Feb 2024 00:09:33 +0900 Subject: [PATCH 0372/1418] bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready() [ Upstream commit 4cd12c6065dfcdeba10f49949bffcf383b3952d8 ] syzbot reported the following NULL pointer dereference issue [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:0x0 [...] Call Trace: sk_psock_verdict_data_ready+0x232/0x340 net/core/skmsg.c:1230 unix_stream_sendmsg+0x9b4/0x1230 net/unix/af_unix.c:2293 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 If sk_psock_verdict_data_ready() and sk_psock_stop_verdict() are called concurrently, psock->saved_data_ready can be NULL, causing the above issue. This patch fixes this issue by calling the appropriate data ready function using the sk_psock_data_ready() helper and protecting it from concurrency with sk->sk_callback_lock. Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Daniel Borkmann Tested-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=fd7b34375c1c8ce29c93 [1] Link: https://lore.kernel.org/bpf/20240218150933.6004-1-syoshida@redhat.com Signed-off-by: Sasha Levin --- net/core/skmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3818035ea0021..39643f78cf782 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1217,8 +1217,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } -- GitLab From ca89b4f5034d5c775956cd3830e9e7930d7053ff Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 0373/1418] tls: break out of main loop when PEEK gets a non-data record [ Upstream commit 10f41d0710fc81b7af93fa6106678d57b1ff24a7 ] PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c8cbdd02a784e..cd86c271c1348 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2137,6 +2137,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } -- GitLab From 6756168add1c6c3ef1c32c335bb843a5d1f99a75 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 0374/1418] tls: stop recv() if initial process_rx_list gave us non-DATA [ Upstream commit fdfbaec5923d9359698cbb286bc0deadbb717504 ] If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index cd86c271c1348..20d2877bf22ad 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2044,7 +2044,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); -- GitLab From bdaf6bbfc1f231bacc22d222d830b9dc817d7d23 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 0375/1418] tls: don't skip over different type records from the rx_list [ Upstream commit ec823bf3a479d42c589dc0f28ef4951c49cd2d2a ] If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 20d2877bf22ad..93e1bfa72d791 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1845,7 +1845,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1858,7 +1859,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1876,12 +1877,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1917,6 +1918,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -2020,6 +2025,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -2039,12 +2045,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2203,10 +2209,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; -- GitLab From 0c9302a6da262e6ab6a6c1d30f04a6130ed97376 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2024 16:58:04 +0100 Subject: [PATCH 0376/1418] netfilter: nf_tables: set dormant flag on hook register failure [ Upstream commit bccebf64701735533c8db37773eeacc6566cc8ec ] We need to set the dormant flag again if we fail to register the hooks. During memory pressure hook registration can fail and we end up with a table marked as active but no registered hooks. On table/base chain deletion, nf_tables will attempt to unregister the hook again which yields a warn splat from the nftables core. Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 49acb89ba9c56..f1a74b0949999 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1245,6 +1245,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } -- GitLab From 9c5662e95a8dcc232c3ef4deb21033badcd260f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 May 2023 07:35:33 +0200 Subject: [PATCH 0377/1418] netfilter: flowtable: simplify route logic [ Upstream commit fa502c86566680ac62bc28ec883a069bf7a2aa5e ] Grab reference to dst from skbuff earlier to simplify route caching. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Stable-dep-of: 9e0f0430389b ("netfilter: nft_flow_offload: reset dst in route object after setting up flow") Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 4 ++-- net/netfilter/nf_flow_table_core.c | 24 +++--------------------- net/netfilter/nft_flow_offload.c | 12 ++++++++---- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index dde4dd9c4012c..692d5955911c7 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -274,8 +274,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, flow_table->type->put(flow_table); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); +void flow_offload_route_init(struct flow_offload *flow, + const struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index c1d99cb370b44..78e4aba52b22a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow, break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: - if (!dst_hold_safe(route->tuple[dir].dst)) - return -1; - flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; @@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow, dst_release(flow->tuplehash[dir].tuple.dst_cache); } -int flow_offload_route_init(struct flow_offload *flow, +void flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route) { - int err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); - if (err < 0) - return err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); - if (err < 0) - goto err_route_reply; - + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; - - return 0; - -err_route_reply: - nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); - - return err; } EXPORT_SYMBOL_GPL(flow_offload_route_init); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 3d9f6dda5aeb2..7a8707632a815 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; } + if (!dst_hold_safe(this_dst)) + return -ENOENT; + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); - if (!other_dst) + if (!other_dst) { + dst_release(this_dst); return -ENOENT; + } nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); @@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (flow_offload_route_init(flow, &route) < 0) - goto err_flow_add; + flow_offload_route_init(flow, &route); if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; @@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; - dst_release(route.tuple[!dir].dst); return; err_flow_add: flow_offload_free(flow); err_flow_alloc: + dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); -- GitLab From 012df10717da02367aaf92c65f9c89db206c15f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: [PATCH 0378/1418] netfilter: nft_flow_offload: reset dst in route object after setting up flow [ Upstream commit 9e0f0430389be7696396c62f037be4bf72cf93e3 ] dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 692d5955911c7..4a767b3d20b9d 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -275,7 +275,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 78e4aba52b22a..2036c7a27075b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -146,7 +156,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); -- GitLab From a6cafdb49a7bbf4a88367db209703eee6941e023 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Feb 2024 21:36:39 +0100 Subject: [PATCH 0379/1418] netfilter: nft_flow_offload: release dst in case direct xmit path is used [ Upstream commit 8762785f459be1cfe6fcf7285c123aad6a3703f0 ] Direct xmit does not use it since it calls dev_queue_xmit() to send packets, hence it calls dst_release(). kmemleak reports: unreferenced object 0xffff88814f440900 (size 184): comm "softirq", pid 0, jiffies 4294951896 hex dump (first 32 bytes): 00 60 5b 04 81 88 ff ff 00 e6 e8 82 ff ff ff ff .`[............. 21 0b 50 82 ff ff ff ff 00 00 00 00 00 00 00 00 !.P............. backtrace (crc cb2bf5d6): [<000000003ee17107>] kmem_cache_alloc+0x286/0x340 [<0000000021a5de2c>] dst_alloc+0x43/0xb0 [<00000000f0671159>] rt_dst_alloc+0x2e/0x190 [<00000000fe5092c9>] __mkroute_output+0x244/0x980 [<000000005fb96fb0>] ip_route_output_flow+0xc0/0x160 [<0000000045367433>] nf_ip_route+0xf/0x30 [<0000000085da1d8e>] nf_route+0x2d/0x60 [<00000000d1ecd1cb>] nft_flow_route+0x171/0x6a0 [nft_flow_offload] [<00000000d9b2fb60>] nft_flow_offload_eval+0x4e8/0x700 [nft_flow_offload] [<000000009f447dbb>] expr_call_ops_eval+0x53/0x330 [nf_tables] [<00000000072e1be6>] nft_do_chain+0x17c/0x840 [nf_tables] [<00000000d0551029>] nft_do_chain_inet+0xa1/0x210 [nf_tables] [<0000000097c9d5c6>] nf_hook_slow+0x5b/0x160 [<0000000005eccab1>] ip_forward+0x8b6/0x9b0 [<00000000553a269b>] ip_rcv+0x221/0x230 [<00000000412872e5>] __netif_receive_skb_one_core+0xfe/0x110 Fixes: fa502c865666 ("netfilter: flowtable: simplify route logic") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2036c7a27075b..99195cf6b2657 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: -- GitLab From 26994a04b0ba50388600a617afe7baaa239cad25 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 21 Apr 2023 00:34:30 +0200 Subject: [PATCH 0380/1418] netfilter: nf_tables: rename function to destroy hook list [ Upstream commit cdc32546632354305afdcf399a5431138a31c9e0 ] Rename nft_flowtable_hooks_destroy() by nft_hooks_destroy() to prepare for netdev chain device updates. Signed-off-by: Pablo Neira Ayuso Stable-dep-of: d472e9853d7b ("netfilter: nf_tables: register hooks last when adding new chain/flowtable") Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f1a74b0949999..c7b543d1a0516 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7938,7 +7938,7 @@ err_unregister_net_hooks: return err; } -static void nft_flowtable_hooks_destroy(struct list_head *hook_list) +static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; @@ -8123,7 +8123,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, &flowtable->hook_list, flowtable); if (err < 0) { - nft_flowtable_hooks_destroy(&flowtable->hook_list); + nft_hooks_destroy(&flowtable->hook_list); goto err4; } @@ -8893,7 +8893,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -9850,7 +9850,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; -- GitLab From f305359186724ac4bc058d5cd01782e6e6f9a3e7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Feb 2024 19:43:53 +0100 Subject: [PATCH 0381/1418] netfilter: nf_tables: register hooks last when adding new chain/flowtable [ Upstream commit d472e9853d7b46a6b094224d131d09ccd3a03daf ] Register hooks last when adding chain/flowtable to ensure that packets do not walk over datastructure that is being released in the error path without waiting for the rcu grace period. Fixes: 91c7b38dc9f0 ("netfilter: nf_tables: use new transaction infrastructure to handle chain") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 78 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c7b543d1a0516..a29313e0aaa4d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -686,15 +686,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -703,22 +704,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -2459,19 +2460,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2479,17 +2476,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -8031,9 +8033,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8113,34 +8115,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], &flowtable_hook, flowtable, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); -- GitLab From ea33b816691255d1d5eeb7f5a02f2acf6a556393 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:38:45 +0100 Subject: [PATCH 0382/1418] netfilter: nf_tables: use kzalloc for hook allocation [ Upstream commit 195e5f88c2e48330ba5483e0bad2de3b3fad484f ] KMSAN reports unitialized variable when registering the hook, reg->hook_ops_type == NF_HOOK_OP_BPF) ~~~~~~~~~~~ undefined This is a small structure, just use kzalloc to make sure this won't happen again when new fields get added to nf_hook_ops. Fixes: 7b4b2fa37587 ("netfilter: annotate nf_tables base hook ops") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a29313e0aaa4d..e21ec3ad80939 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2059,7 +2059,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; -- GitLab From c22ad76cfc43a2c7923de1a9acc0d05b9fa2c63c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 15 Feb 2024 15:53:08 +0800 Subject: [PATCH 0383/1418] net: mctp: put sock on tag allocation failure [ Upstream commit 9990889be14288d4f1743e4768222d5032a79c27 ] We may hold an extra reference on a socket if a tag allocation fails: we optimistically allocate the sk_key, and take a ref there, but do not drop if we end up not using the allocated key. Ensure we're dropping the sock on this failure by doing a proper unref rather than directly kfree()ing. Fixes: de8a6b15d965 ("net: mctp: add an explicit reference from a mctp_sk_key to sock") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce9b61e44d1cdae7797be0c5e3141baf582d23a0.1707983487.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 68be8f2b622dd..256bf0b89e6ca 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } -- GitLab From 1623161f80a4b8bb284b0cc3118daf3ccd5bf923 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 19 Feb 2024 09:00:43 +0100 Subject: [PATCH 0384/1418] net: sparx5: Add spinlock for frame transmission from CPU [ Upstream commit 603ead96582d85903baec2d55f021b8dac5c25d2 ] Both registers used when doing manual injection or fdma injection are shared between all the net devices of the switch. It was noticed that when having two process which each of them trying to inject frames on different ethernet ports, that the HW started to behave strange, by sending out more frames then expected. When doing fdma injection it is required to set the frame in the DCB and then make sure that the next pointer of the last DCB is invalid. But because there is no locks for this, then easily this pointer between the DCB can be broken and then it would create a loop of DCBs. And that means that the HW will continuously transmit these frames in a loop. Until the SW will break this loop. Therefore to fix this issue, add a spin lock for when accessing the registers for manual or fdma injection. Signed-off-by: Horatiu Vultur Reviewed-by: Daniel Machon Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Link: https://lore.kernel.org/r/20240219080043.1561014-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 3423c95cc84ae..7031f41287e09 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -744,6 +744,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 7a83222caa737..cb3173d2b0e8d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -278,6 +278,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc2..ac7e1cffbcecf 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; -- GitLab From f556a352fdb2489d50f1231f58b6dfbadb4e3756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:13 +0200 Subject: [PATCH 0385/1418] phonet: take correct lock to peek at the RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b2d9bc4d4acdf15a876eae2c0d83149250e85ba ] The receive queue is protected by its embedded spin-lock, not the socket lock, so we need the former lock here (and only that one). Fixes: 107d0d9b8d9a ("Phonet: Phonet datagram transport protocol") Reported-by: Luosili Signed-off-by: Rémi Denis-Courmont Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240218081214.4806-1-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ff5f49ab236ed..39a6c5713d0b2 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -35,10 +35,10 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); answ = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(answ, (int __user *)arg); case SIOCPNADDRESOURCE: -- GitLab From 9d5523e065b568e79dfaa2ea1085a5bcf74baf78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:14 +0200 Subject: [PATCH 0386/1418] phonet/pep: fix racy skb_queue_empty() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d2a894d7f487dcb894df023e9d3014cf5b93fe5 ] The receive queues are protected by their respective spin-lock, not the socket lock. This could lead to skb_peek() unexpectedly returning NULL or a pointer to an already dequeued socket buffer. Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Signed-off-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240218081214.4806-2-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/pep.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 83ea13a50690b..607f54c23647a 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); @@ -930,15 +961,7 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - answ = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - answ = skb_peek(&sk->sk_receive_queue)->len; - else - answ = 0; - release_sock(sk); + answ = pep_first_packet_length(sk); ret = put_user(answ, (int __user *)arg); break; -- GitLab From 37919ef31d7c9967ce299af0151bd7d22d7e53f7 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 19 Feb 2024 14:52:54 +0100 Subject: [PATCH 0387/1418] Fix write to cloned skb in ipv6_hop_ioam() [ Upstream commit f198d933c2e4f8f89e0620fbaf1ea7eac384a0eb ] ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam(). It also updates ip6_parse_tlv() to refresh the network header pointer ("nh") when returning from ipv6_hop_ioam(). Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Paolo Abeni Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/exthdrs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5fa0e37305d9d..1cfdd9d950123 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -180,6 +180,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -974,6 +976,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: -- GitLab From b9196289e36cc520ad9fa2d2f4b1c54b06f364a4 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 20 Feb 2024 12:30:07 +0530 Subject: [PATCH 0388/1418] net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY [ Upstream commit 3489182b11d35f1944c1245fc9c4867cf622c50f ] Commit bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") extended support of the driver from the existing support for RTL8211F(D)(I)-CG PHY to the newer RTL8211F(D)(I)-VD-CG PHY. While that commit indicated that the RTL8211F_PHYCR2 register is not supported by the "VD-CG" PHY model and therefore updated the corresponding section in rtl8211f_config_init() to be invoked conditionally, the call to "genphy_soft_reset()" was left as-is, when it should have also been invoked conditionally. This is because the call to "genphy_soft_reset()" was first introduced by the commit 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock") since the RTL8211F guide indicates that a PHY reset should be issued after setting bits in the PHYCR2 register. As the PHYCR2 register is not applicable to the "VD-CG" PHY model, fix the rtl8211f_config_init() function by invoking "genphy_soft_reset()" conditionally based on the presence of the "PHYCR2" register. Fixes: bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") Signed-off-by: Siddharth Vadapalli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220070007.968762-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 3d99fd6664d7a..70e52d27064ec 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -414,9 +414,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_resume(struct phy_device *phydev) -- GitLab From fd7b4f4fdc7cc00fb5e6812b67decdde33b3786e Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Fri, 19 Jan 2024 08:32:06 -0800 Subject: [PATCH 0389/1418] drm/syncobj: call drm_syncobj_fence_add_wait when WAIT_AVAILABLE flag is set [ Upstream commit 3c43177ffb54ea5be97505eb8e2690e99ac96bc9 ] When waiting for a syncobj timeline point whose fence has not yet been submitted with the WAIT_FOR_SUBMIT flag, a callback is registered using drm_syncobj_fence_add_wait and the thread is put to sleep until the timeout expires. If the fence is submitted before then, drm_syncobj_add_point will wake up the sleeping thread immediately which will proceed to wait for the fence to be signaled. However, if the WAIT_AVAILABLE flag is used instead, drm_syncobj_fence_add_wait won't get called, meaning the waiting thread will always sleep for the full timeout duration, even if the fence gets submitted earlier. If it turns out that the fence *has* been submitted by the time it eventually wakes up, it will still indicate to userspace that the wait completed successfully (it won't return -ETIME), but it will have taken much longer than it should have. To fix this, we must call drm_syncobj_fence_add_wait if *either* the WAIT_FOR_SUBMIT flag or the WAIT_AVAILABLE flag is set. The only difference being that with WAIT_FOR_SUBMIT we will also wait for the fence to be signaled after it has been submitted while with WAIT_AVAILABLE we will return immediately. IGT test patch: https://lists.freedesktop.org/archives/igt-dev/2024-January/067537.html v1 -> v2: adjust lockdep_assert_none_held_once condition (cherry picked from commit 8c44ea81634a4a337df70a32621a5f3791be23df) Fixes: 01d6c3578379 ("drm/syncobj: add support for timeline point wait v8") Signed-off-by: Erik Kurzinger Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240119163208.3723457-1-ekurzinger@nvidia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_syncobj.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index da0145bc104a8..8f2737075dc2f 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -980,7 +980,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1049,7 +1050,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } -- GitLab From 58168005337eabef345a872be3f87d0215ff3b30 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:50:50 +0100 Subject: [PATCH 0390/1418] drm/amd/display: Fix memory leak in dm_sw_fini() [ Upstream commit bae67893578d608e35691dcdfa90c4957debf1d3 ] After destroying dmub_srv, the memory associated with it is not freed, causing a memory leak: unreferenced object 0xffff896302b45800 (size 1024): comm "(udev-worker)", pid 222, jiffies 4294894636 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 6265fd77): [] kmalloc_trace+0x29d/0x340 [] dm_dmub_sw_init+0xb4/0x450 [amdgpu] [] dm_sw_init+0x15/0x2b0 [amdgpu] [] amdgpu_device_init+0x1417/0x24e0 [amdgpu] [] amdgpu_driver_load_kms+0x15/0x190 [amdgpu] [] amdgpu_pci_probe+0x187/0x4e0 [amdgpu] [] local_pci_probe+0x3e/0x90 [] pci_device_probe+0xc3/0x230 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] do_one_initcall+0x41/0x300 Fix this by freeing dmub_srv after destroying it. Fixes: 743b9786b14a ("drm/amd/display: Hook up the DMUB service in DM") Signed-off-by: Armin Wolf Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a826c92933199..da16048bf1004 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2255,6 +2255,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } -- GitLab From b1301f15ddc26d53f7e77e76c8c7236ce4bc2a18 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 21 Feb 2024 20:27:13 +0100 Subject: [PATCH 0391/1418] i2c: imx: when being a target, mark the last read as processed [ Upstream commit 87aec499368d488c20292952d6d4be7cb9e49c5e ] When being a target, NAK from the controller means that all bytes have been transferred. So, the last byte needs also to be marked as 'processed'. Otherwise index registers of backends may not increase. Fixes: f7414cd6923f ("i2c: imx: support slave mode for imx I2C driver") Signed-off-by: Corey Minyard Tested-by: Andrew Manley Reviewed-by: Andrew Manley Reviewed-by: Oleksij Rempel [wsa: fixed comment and commit message to properly describe the case] Signed-off-by: Wolfram Sang Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-imx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fc70920c4ddab..0c203c614197c 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -804,6 +804,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } -- GitLab From 54407d9bc5e32e51d4e8488d2cfbaa976e9127fb Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 22 Oct 2023 21:09:57 +0800 Subject: [PATCH 0392/1418] erofs: simplify compression configuration parser commit efb4fb02cef3ab410b603c8f0e1c67f61d55f542 upstream. Move erofs_load_compr_cfgs() into decompressor.c as well as introduce a callback instead of a hard-coded switch for each algorithm for simplicity. Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20231022130957.11398-1-xiang@kernel.org Stable-dep-of: 118a8cf504d7 ("erofs: fix inconsistent per-file compression format") Signed-off-by: Yue Hu Signed-off-by: Greg Kroah-Hartman --- fs/erofs/compress.h | 4 ++ fs/erofs/decompressor.c | 60 ++++++++++++++++++++++++++++-- fs/erofs/decompressor_lzma.c | 4 +- fs/erofs/internal.h | 28 ++------------ fs/erofs/super.c | 72 +++++------------------------------- 5 files changed, 76 insertions(+), 92 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 26fa170090b8f..c4a3187bdb8fc 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -21,6 +21,8 @@ struct z_erofs_decompress_req { }; struct z_erofs_decompressor { + int (*config)(struct super_block *sb, struct erofs_super_block *dsb, + void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); char *name; @@ -93,6 +95,8 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); /* prototypes for specific algorithms */ +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size); int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); #endif diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 0cfad74374ca9..ae3cfd018d99c 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -24,11 +24,11 @@ struct z_erofs_lz4_decompress_ctx { unsigned int oend; }; -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int size) +static int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size) { struct erofs_sb_info *sbi = EROFS_SB(sb); + struct z_erofs_lz4_cfgs *lz4 = data; u16 distance; if (lz4) { @@ -374,17 +374,71 @@ static struct z_erofs_decompressor decompressors[] = { .name = "interlaced" }, [Z_EROFS_COMPRESSION_LZ4] = { + .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA [Z_EROFS_COMPRESSION_LZMA] = { + .config = z_erofs_load_lzma_config, .decompress = z_erofs_lzma_decompress, .name = "lzma" }, #endif }; +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret = 0; + + if (!erofs_sb_has_compr_cfgs(sbi)) { + sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; + return z_erofs_load_lz4_config(sb, dsb, NULL, 0); + } + + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "unidentified algorithms %x, please upgrade kernel", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EOPNOTSUPP; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + alg = 0; + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &buf, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + break; + } + + if (alg >= ARRAY_SIZE(decompressors) || + !decompressors[alg].config) { + erofs_err(sb, "algorithm %d isn't enabled on this kernel", + alg); + ret = -EOPNOTSUPP; + } else { + ret = decompressors[alg].config(sb, + dsb, data, size); + } + + kfree(data); + if (ret) + break; + } + erofs_put_metabuf(&buf); + return ret; +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 49addc345aebe..970464c4b6769 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -72,10 +72,10 @@ int z_erofs_lzma_init(void) } int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) + struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(lzma_resize_mutex); + struct z_erofs_lzma_cfgs *lzma = data; unsigned int dict_size, i; struct z_erofs_lzma *strm, *head = NULL; int err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d8d09fc3ed655..79a7a5815ea63 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -471,6 +471,8 @@ struct erofs_map_dev { /* data.c */ extern const struct file_operations erofs_file_fops; +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, struct inode *inode, @@ -565,9 +567,7 @@ void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int erofs_try_to_free_cached_page(struct page *page); -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len); +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} @@ -575,36 +575,14 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} -static inline int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len) -{ - if (lz4 || dsb->u1.lz4_max_distance) { - erofs_err(sb, "lz4 algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA int z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); -int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size); #else static inline int z_erofs_lzma_init(void) { return 0; } static inline int z_erofs_lzma_exit(void) { return 0; } -static inline int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) { - if (lzma) { - erofs_err(sb, "lzma algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ /* flags for erofs_fscache_register_cookie() */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index bd8bf8fc2f5df..f2647126cb2fb 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -126,8 +126,8 @@ static bool check_layout_compatibility(struct super_block *sb, #ifdef CONFIG_EROFS_FS_ZIP /* read variable-sized metadata, offset will be aligned by 4-byte */ -static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, - erofs_off_t *offset, int *lengthp) +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp) { u8 *buffer, *ptr; int len, i, cnt; @@ -159,64 +159,15 @@ static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, } return buffer; } - -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - unsigned int algs, alg; - erofs_off_t offset; - int size, ret = 0; - - sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); - if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { - erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", - sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); - return -EINVAL; - } - - offset = EROFS_SUPER_OFFSET + sbi->sb_size; - alg = 0; - for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { - void *data; - - if (!(algs & 1)) - continue; - - data = erofs_read_metadata(sb, &buf, &offset, &size); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - - switch (alg) { - case Z_EROFS_COMPRESSION_LZ4: - ret = z_erofs_load_lz4_config(sb, dsb, data, size); - break; - case Z_EROFS_COMPRESSION_LZMA: - ret = z_erofs_load_lzma_config(sb, dsb, data, size); - break; - default: - DBG_BUGON(1); - ret = -EFAULT; - } - kfree(data); - if (ret) - break; - } - erofs_put_metabuf(&buf); - return ret; -} #else -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) +static int z_erofs_parse_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) { - if (dsb->u1.available_compr_algs) { - erofs_err(sb, "try to load compressed fs when compression is disabled"); - return -EINVAL; - } - return 0; + if (!dsb->u1.available_compr_algs) + return 0; + + erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); + return -EOPNOTSUPP; } #endif @@ -398,10 +349,7 @@ static int erofs_read_superblock(struct super_block *sb) } /* parse on-disk compression configurations */ - if (erofs_sb_has_compr_cfgs(sbi)) - ret = erofs_load_compr_cfgs(sb, dsb); - else - ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); + ret = z_erofs_parse_cfgs(sb, dsb); if (ret < 0) goto out; -- GitLab From 47467e04816cb297905c0f09bc2d11ef865942d9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 13 Jan 2024 23:06:02 +0800 Subject: [PATCH 0393/1418] erofs: fix inconsistent per-file compression format commit 118a8cf504d7dfa519562d000f423ee3ca75d2c4 upstream. EROFS can select compression algorithms on a per-file basis, and each per-file compression algorithm needs to be marked in the on-disk superblock for initialization. However, syzkaller can generate inconsistent crafted images that use an unsupported algorithmtype for specific inodes, e.g. use MicroLZMA algorithmtype even it's not set in `sbi->available_compr_algs`. This can lead to an unexpected "BUG: kernel NULL pointer dereference" if the corresponding decompressor isn't built-in. Fix this by checking against `sbi->available_compr_algs` for each m_algorithmformat request. Incorrect !erofs_sb_has_compr_cfgs preset bitmap is now fixed together since it was harmless previously. Reported-by: Fixes: 8f89926290c4 ("erofs: get compression algorithms directly on mapping") Fixes: 622ceaddb764 ("erofs: lzma compression support") Reviewed-by: Yue Hu Link: https://lore.kernel.org/r/20240113150602.1471050-1-hsiangkao@linux.alibaba.com Signed-off-by: Gao Xiang Signed-off-by: Gao Xiang Signed-off-by: Yue Hu Signed-off-by: Greg Kroah-Hartman --- fs/erofs/decompressor.c | 2 +- fs/erofs/zmap.c | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index ae3cfd018d99c..1eefa4411e066 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -396,7 +396,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) int size, ret = 0; if (!erofs_sb_has_compr_cfgs(sbi)) { - sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; + sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4; return z_erofs_load_lz4_config(sb, dsb, NULL, 0); } diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0337b70b2dac4..abcded1acd194 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -610,7 +610,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff; + unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; @@ -700,17 +700,20 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) - map->m_algorithmformat = - Z_EROFS_COMPRESSION_INTERLACED; - else - map->m_algorithmformat = - Z_EROFS_COMPRESSION_SHIFTED; - } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { - map->m_algorithmformat = vi->z_algorithmtype[1]; + afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? + Z_EROFS_COMPRESSION_INTERLACED : + Z_EROFS_COMPRESSION_SHIFTED; } else { - map->m_algorithmformat = vi->z_algorithmtype[0]; + afmt = m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 ? + vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; + if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + afmt, vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } } + map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && -- GitLab From 7ebeee513f8f2abdfe98632afdba0b8ebe44339b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:24 -0800 Subject: [PATCH 0394/1418] mm/damon/reclaim: fix quota stauts loss due to online tunings commit 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 upstream. Patch series "mm/damon: fix quota status loss due to online tunings". DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status when applying new user parameters, and hence could cause temporal quota accuracy degradation. Fix it by preserving the status. This patch (of 2): For online parameters change, DAMON_RECLAIM creates new scheme based on latest values of the parameters and replaces the old scheme with the new one. When creating it, the internal status of the quota of the old scheme is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/reclaim.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca00fd..cc337e94acfda 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -141,9 +141,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; int err = 0; err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs); @@ -154,6 +165,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } damon_set_schemes(ctx, &scheme, 1); return damon_set_region_biggest_system_ram_default(target, -- GitLab From 18f614369def2a11a52f569fe0f910b199d13487 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: [PATCH 0395/1418] fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio commit b820de741ae48ccf50dd95e297889c286ff4f760 upstream. If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index e85ba0b77f596..849c3e3ed558b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -595,6 +595,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1476,7 +1483,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a1911dcf834b..67313881f8ac1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -337,6 +337,8 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) struct kiocb { struct file *ki_filp; -- GitLab From 14f1992430ef9e647b02aa8ca12c5bcb9a1dffea Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 25 Jan 2024 08:51:27 +0000 Subject: [PATCH 0396/1418] mm: zswap: fix missing folio cleanup in writeback race path commit e3b63e966cac0bf78aaa1efede1827a252815a1d upstream. In zswap_writeback_entry(), after we get a folio from __read_swap_cache_async(), we grab the tree lock again to check that the swap entry was not invalidated and recycled. If it was, we delete the folio we just added to the swap cache and exit. However, __read_swap_cache_async() returns the folio locked when it is newly allocated, which is always true for this path, and the folio is ref'd. Make sure to unlock and put the folio before returning. This was discovered by code inspection, probably because this path handles a race condition that should not happen often, and the bug would not crash the system, it will only strand the folio indefinitely. Link: https://lkml.kernel.org/r/20240125085127.1327013-1-yosryahmed@google.com Fixes: 04fc7816089c ("mm: fix zswap writeback race condition") Signed-off-by: Yosry Ahmed Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Domenico Cerasuolo Cc: Signed-off-by: Andrew Morton Signed-off-by: Yosry Ahmed Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index b3829ada4a413..b7cb126797f9e 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1013,6 +1013,8 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); + unlock_page(page); + put_page(page); ret = -ENOMEM; goto fail; } -- GitLab From b03bca85617b95965a1af5722e452775a1ab8f32 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:04 -0700 Subject: [PATCH 0397/1418] mptcp: userspace pm send RM_ADDR for ID 0 commit 84c531f54ad9a124a924c9505d74e33d16965146 upstream. This patch adds the ability to send RM_ADDR for local ID 0. Check whether id 0 address is removed, if not, put id 0 into a removing list, pass it to mptcp_pm_remove_addr() to remove id 0 address. There is no reason not to allow the userspace to remove the initial address (ID 0). This special case was not taken into account not letting the userspace to delete all addresses as announced. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-3-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2895be3046f79..631fa104617c3 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -222,6 +222,40 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (subflow->local_id == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -253,6 +287,11 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { -- GitLab From 70a4a26572013738ee9b4d0e18487ef22f97be7c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:29 +0100 Subject: [PATCH 0398/1418] mptcp: add needs_id for netlink appending addr commit 584f3894262634596532cf43a5e782e34a0ce374 upstream. Just the same as userspace PM, a new parameter needs_id is added for in-kernel PM mptcp_pm_nl_append_new_local_addr() too. Add a new helper mptcp_pm_has_addr_attr_id() to check whether an address ID is set from PM or not. In mptcp_pm_nl_get_local_id(), needs_id is always true, but in mptcp_pm_nl_add_addr_doit(), pass mptcp_pm_has_addr_attr_id() to needs_it. Fixes: efd5a4c04e18 ("mptcp: add the address ID assignment bitmap") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 980050f6b456f..70a1025f093cf 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -900,7 +900,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -942,7 +943,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -953,7 +954,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1095,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1311,6 +1312,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_addr_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1352,7 +1365,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); goto out_free; -- GitLab From 4a37c6c068345ca2e4001e47555f5031872361d2 Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 21 Sep 2023 17:33:51 +0800 Subject: [PATCH 0399/1418] ata: ahci: add identifiers for ASM2116 series adapters commit 3bf6141060948e27b62b13beb216887f2e54591e upstream. Add support for PCIe SATA adapter cards based on Asmedia 2116 controllers. These cards can provide up to 10 SATA ports on PCIe card. Signed-off-by: Szuying Chen Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 42c6b660550c2..6da5005c2e268 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -613,6 +613,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ + { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ + { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ + { PCI_VDEVICE(ASMEDIA, 0x1165), board_ahci }, /* ASM1165 */ + { PCI_VDEVICE(ASMEDIA, 0x1166), board_ahci }, /* ASM1166 */ /* * Samsung SSDs found on some macbooks. NCQ times out if MSI is -- GitLab From f8fc1f1d9f65b89e7b90ffea5dfd58d427e48017 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 30 Jan 2024 15:21:51 +0200 Subject: [PATCH 0400/1418] ahci: Extend ASM1061 43-bit DMA address quirk to other ASM106x parts commit 51af8f255bdaca6d501afc0d085b808f67b44d91 upstream. ASMedia have confirmed that all ASM106x parts currently listed in ahci_pci_tbl[] suffer from the 43-bit DMA address limitation that we ran into on the ASM1061, and therefore, we need to apply the quirk added by commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers") to the other supported ASM106x parts as well. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-ide/ZbopwKZJAKQRA4Xv@x1-carbon/ Signed-off-by: Lennert Buytenhek [cassel: add link to ASMedia confirmation email] Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6da5005c2e268..1790a2ecb9fac 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -606,13 +606,13 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* ASMedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ -- GitLab From f119f2325ba70cbfdec701000dcad4d88805d5b0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 0401/1418] arp: Prevent overflow in arp_req_get(). commit a7d6027790acea24446ddd6632d394096c0f4667 upstream. syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 Reported-by: syzkaller Reported-by: Bjoern Doebel Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5d..ccff96820a703 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; -- GitLab From 277439e7cabd9d4c6334b39a4b99d49b4c97265b Mon Sep 17 00:00:00 2001 From: Edward Lo Date: Thu, 27 Oct 2022 23:33:37 +0800 Subject: [PATCH 0402/1418] fs/ntfs3: Enhance the attribute size check commit 4f082a7531223a438c757bb20e304f4c941c67a8 upstream. This combines the overflow and boundary check so that all attribute size will be properly examined while enumerating them. [ 169.181521] BUG: KASAN: slab-out-of-bounds in run_unpack+0x2e3/0x570 [ 169.183161] Read of size 1 at addr ffff8880094b6240 by task mount/247 [ 169.184046] [ 169.184925] CPU: 0 PID: 247 Comm: mount Not tainted 6.0.0-rc7+ #3 [ 169.185908] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 169.187066] Call Trace: [ 169.187492] [ 169.188049] dump_stack_lvl+0x49/0x63 [ 169.188495] print_report.cold+0xf5/0x689 [ 169.188964] ? run_unpack+0x2e3/0x570 [ 169.189331] kasan_report+0xa7/0x130 [ 169.189714] ? run_unpack+0x2e3/0x570 [ 169.190079] __asan_load1+0x51/0x60 [ 169.190634] run_unpack+0x2e3/0x570 [ 169.191290] ? run_pack+0x840/0x840 [ 169.191569] ? run_lookup_entry+0xb3/0x1f0 [ 169.192443] ? mi_enum_attr+0x20a/0x230 [ 169.192886] run_unpack_ex+0xad/0x3e0 [ 169.193276] ? run_unpack+0x570/0x570 [ 169.193557] ? ni_load_mi+0x80/0x80 [ 169.193889] ? debug_smp_processor_id+0x17/0x20 [ 169.194236] ? mi_init+0x4a/0x70 [ 169.194496] attr_load_runs_vcn+0x166/0x1c0 [ 169.194851] ? attr_data_write_resident+0x250/0x250 [ 169.195188] mi_read+0x133/0x2c0 [ 169.195481] ntfs_iget5+0x277/0x1780 [ 169.196017] ? call_rcu+0x1c7/0x330 [ 169.196392] ? ntfs_get_block_bmap+0x70/0x70 [ 169.196708] ? evict+0x223/0x280 [ 169.197014] ? __kmalloc+0x33/0x540 [ 169.197305] ? wnd_init+0x15b/0x1b0 [ 169.197599] ntfs_fill_super+0x1026/0x1ba0 [ 169.197994] ? put_ntfs+0x1d0/0x1d0 [ 169.198299] ? vsprintf+0x20/0x20 [ 169.198583] ? mutex_unlock+0x81/0xd0 [ 169.198930] ? set_blocksize+0x95/0x150 [ 169.199269] get_tree_bdev+0x232/0x370 [ 169.199750] ? put_ntfs+0x1d0/0x1d0 [ 169.200094] ntfs_fs_get_tree+0x15/0x20 [ 169.200431] vfs_get_tree+0x4c/0x130 [ 169.200714] path_mount+0x654/0xfe0 [ 169.201067] ? putname+0x80/0xa0 [ 169.201358] ? finish_automount+0x2e0/0x2e0 [ 169.201965] ? putname+0x80/0xa0 [ 169.202445] ? kmem_cache_free+0x1c4/0x440 [ 169.203075] ? putname+0x80/0xa0 [ 169.203414] do_mount+0xd6/0xf0 [ 169.203719] ? path_mount+0xfe0/0xfe0 [ 169.203977] ? __kasan_check_write+0x14/0x20 [ 169.204382] __x64_sys_mount+0xca/0x110 [ 169.204711] do_syscall_64+0x3b/0x90 [ 169.205059] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 169.205571] RIP: 0033:0x7f67a80e948a [ 169.206327] Code: 48 8b 0d 11 fa 2a 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 008 [ 169.208296] RSP: 002b:00007ffddf020f58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 [ 169.209253] RAX: ffffffffffffffda RBX: 000055e2547a6060 RCX: 00007f67a80e948a [ 169.209777] RDX: 000055e2547a6260 RSI: 000055e2547a62e0 RDI: 000055e2547aeaf0 [ 169.210342] RBP: 0000000000000000 R08: 000055e2547a6280 R09: 0000000000000020 [ 169.210843] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 000055e2547aeaf0 [ 169.211307] R13: 000055e2547a6260 R14: 0000000000000000 R15: 00000000ffffffff [ 169.211913] [ 169.212304] [ 169.212680] Allocated by task 0: [ 169.212963] (stack is not available) [ 169.213200] [ 169.213472] The buggy address belongs to the object at ffff8880094b5e00 [ 169.213472] which belongs to the cache UDP of size 1152 [ 169.214095] The buggy address is located 1088 bytes inside of [ 169.214095] 1152-byte region [ffff8880094b5e00, ffff8880094b6280) [ 169.214639] [ 169.215004] The buggy address belongs to the physical page: [ 169.215766] page:000000002e324c8c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x94b4 [ 169.218412] head:000000002e324c8c order:2 compound_mapcount:0 compound_pincount:0 [ 169.219078] flags: 0xfffffc0010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) [ 169.220272] raw: 000fffffc0010200 0000000000000000 dead000000000122 ffff888002409b40 [ 169.221006] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 [ 169.222320] page dumped because: kasan: bad access detected [ 169.222922] [ 169.223119] Memory state around the buggy address: [ 169.224056] ffff8880094b6100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.224908] ffff8880094b6180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.225677] >ffff8880094b6200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.226445] ^ [ 169.227055] ffff8880094b6280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.227638] ffff8880094b6300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Signed-off-by: Edward Lo Signed-off-by: Konstantin Komarov Cc: "Doebel, Bjoern" Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/record.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ab03c373cec66..a8d4ed7bca025 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -226,11 +226,6 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* overflow check */ - return NULL; - } - attr = Add2Ptr(attr, asize); off += asize; } @@ -253,8 +248,8 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if ((t32 & 0xf) || (t32 > 0x100)) return NULL; - /* Check boundary. */ - if (off + asize > used) + /* Check overflow and boundary. */ + if (off + asize < off || off + asize > used) return NULL; /* Check size of attribute. */ -- GitLab From a3eb3a74aa8c94e6c8130b55f3b031f29162868c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Mar 2024 13:26:39 +0100 Subject: [PATCH 0403/1418] Linux 6.1.80 Link: https://lore.kernel.org/r/20240227131610.391465389@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: Kelsey Steele Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Yann Sionneau Tested-by: Conor Dooley Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d6bc9f597e8b8..bc4adb561a7cf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Curry Ramen -- GitLab From b7be6c737a179a76901c872f6b4c1d00552d9a1b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:22:18 +0200 Subject: [PATCH 0404/1418] netfilter: nf_tables: disallow timeout for anonymous sets commit e26d3009efda338f19016df4175f354a9bd0a4ab upstream. Never used from userspace, disallow these parameters. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e21ec3ad80939..d3ba947f43761 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4752,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4760,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } -- GitLab From ae5f10ed9539878f1128f3fa129f104ba97ffc86 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 30 May 2023 09:38:09 +0200 Subject: [PATCH 0405/1418] drm/meson: fix unbind path if HDMI fails to bind [ Upstream commit 6a044642988b5f8285f3173b8e88784bef2bc306 ] If the case the HDMI controller fails to bind, we try to unbind all components before calling drm_dev_put() which makes drm_bridge_detach() crash because unbinding the HDMI controller frees the bridge memory. The solution is the unbind all components at the end like in the remove path. Reviewed-by: Nicolas Belin Tested-by: Nicolas Belin Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230512-amlogic-v6-4-upstream-dsi-ccf-vim3-v5-8-56eb7a4d5b8e@linaro.org Stable-dep-of: bd915ae73a2d ("drm/meson: Don't remove bridges which are created by other drivers") Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_drv.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 119544d88b586..fbac39aa38cc4 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) goto exit_afbcd; if (has_components) { - ret = component_bind_all(drm->dev, drm); + ret = component_bind_all(dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); + /* Do not try to unbind */ + has_components = false; goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto unbind_all; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); -unbind_all: - if (has_components) - component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); + meson_encoder_hdmi_remove(priv); + meson_encoder_cvbs_remove(priv); + + if (has_components) + component_unbind_all(dev, drm); + return ret; } -- GitLab From 7d34b1078665e171f4883b8675e52d17ebfc5c64 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 15 Feb 2024 23:04:42 +0100 Subject: [PATCH 0406/1418] drm/meson: Don't remove bridges which are created by other drivers [ Upstream commit bd915ae73a2d78559b376ad2caf5e4ef51de2455 ] Stop calling drm_bridge_remove() for bridges allocated/managed by other drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}. drm_bridge_remove() unregisters the bridge so it cannot be used anymore. Doing so for bridges we don't own can lead to the video pipeline not being able to come up after -EPROBE_DEFER of the VPU because we're unregistering a bridge that's managed by another driver. The other driver doesn't know that we have unregistered it's bridge and on subsequent .probe() we're not able to find those bridges anymore (since nobody re-creates them). This fixes probe errors on Meson8b boards with the CVBS outputs enabled. Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time") Fixes: 42dcf15f901c ("drm/meson: add DSI encoder") Cc: Reported-by: Steve Morvai Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Tested-by: Steve Morvai Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 - drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e3..3407450435e20 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index b14e6e507c61b..03062e7a02b64 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } -- GitLab From cf33e6ca12d814e1be2263cb76960d0019d7fb94 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 29 Dec 2022 13:01:40 -0600 Subject: [PATCH 0407/1418] scsi: core: Add struct for args to execution functions [ Upstream commit d0949565811f0896c1c7e781ab2ad99d34273fdf ] Move the SCSI execution functions to use a struct for passing in optional args. This commit adds the new struct, temporarily converts scsi_execute() and scsi_execute_req() ands a new helper, scsi_execute_cmd(), which takes the scsi_exec_args struct. There should be no change in behavior. We no longer allow users to pass in any request->rq_flags value, but they were only passing in RQF_PM which we do support by allowing users to pass in the BLK_MQ_REQ flags used by blk_mq_alloc_request(). Subsequent commits will convert scsi_execute() and scsi_execute_req() users to the new helpers then remove scsi_execute() and scsi_execute_req(). Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Stable-dep-of: 321da3dc1f3c ("scsi: sd: usb_storage: uas: Access media prior to querying device properties") Signed-off-by: Sasha Levin --- drivers/scsi/scsi_lib.c | 52 ++++++++++++++++++-------------------- include/scsi/scsi_device.h | 51 +++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5c5954b78585e..edd296f950a33 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } - /** - * __scsi_execute - insert request and wait for the result - * @sdev: scsi device + * scsi_execute_cmd - insert request and wait for the result + * @sdev: scsi_device * @cmd: scsi command - * @data_direction: data direction + * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer - * @sense: optional sense buffer - * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @flags: flags for ->cmd_flags - * @rq_flags: flags for ->rq_flags - * @resid: optional residual length + * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid) +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args) { + static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - req = scsi_alloc_request(sdev->request_queue, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); + if (!args) + args = &default_args; + else if (WARN_ON_ONCE(args->sense && + args->sense_len != SCSI_SENSE_BUFFERSIZE)) + return -EINVAL; + + req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET; + req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (resid) - *resid = scmd->resid_len; - if (sense && scmd->sense_len) - memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (sshdr) + if (args->resid) + *args->resid = scmd->resid_len; + if (args->sense) + memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - sshdr); + args->sshdr); + ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(__scsi_execute); +EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d2751ed536df2..b407807cc6695 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -479,28 +479,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); -extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid); + +/* Optional arguments to scsi_execute_cmd */ +struct scsi_exec_args { + unsigned char *sense; /* sense buffer */ + unsigned int sense_len; /* sense buffer len */ + struct scsi_sense_hdr *sshdr; /* decoded sense header */ + blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ + int *resid; /* residual length */ +}; + +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args); + /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ - sshdr, timeout, retries, flags, rq_flags, resid) \ +#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ + _sshdr, _timeout, _retries, _flags, _rq_flags, \ + _resid) \ ({ \ - BUILD_BUG_ON((sense) != NULL && \ - sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ - __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ - sense, sshdr, timeout, retries, flags, rq_flags, \ - resid); \ + scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ + REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ + _buffer, _bufflen, _timeout, _retries, \ + &(struct scsi_exec_args) { \ + .sense = _sense, \ + .sshdr = _sshdr, \ + .req_flags = _rq_flags & RQF_PM ? \ + BLK_MQ_REQ_PM : 0, \ + .resid = _resid, \ + }); \ }) + static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute(sdev, cmd, data_direction, buffer, - bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); + return scsi_execute_cmd(sdev, cmd, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, + bufflen, timeout, retries, + &(struct scsi_exec_args) { + .sshdr = sshdr, + .resid = resid, + }); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); -- GitLab From b73dd5f9997279715cd450ee8ca599aaff2eabb9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 0408/1418] scsi: sd: usb_storage: uas: Access media prior to querying device properties [ Upstream commit 321da3dc1f3c92a12e3c5da934090d2992a8814c ] It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 31b5273f43a71..4433b02c8935f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da536..12cf9940e5b67 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index de3836412bf32..ed22053b3252f 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b407807cc6695..a64713fe52640 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From 2a3d40b4025fcfe51b04924979f1653993b17669 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 9 Feb 2024 14:04:53 -0800 Subject: [PATCH 0409/1418] af_unix: Fix task hung while purging oob_skb in GC. [ Upstream commit 25236c91b5ab4a26a56ba2e79b8060cf4e047839 ] syzbot reported a task hung; at the same time, GC was looping infinitely in list_for_each_entry_safe() for OOB skb. [0] syzbot demonstrated that the list_for_each_entry_safe() was not actually safe in this case. A single skb could have references for multiple sockets. If we free such a skb in the list_for_each_entry_safe(), the current and next sockets could be unlinked in a single iteration. unix_notinflight() uses list_del_init() to unlink the socket, so the prefetched next socket forms a loop itself and list_for_each_entry_safe() never stops. Here, we must use while() and make sure we always fetch the first socket. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 5065 Comm: syz-executor236 Not tainted 6.8.0-rc3-syzkaller-00136-g1f719a2f3fa6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:preempt_count arch/x86/include/asm/preempt.h:26 [inline] RIP: 0010:check_kcov_mode kernel/kcov.c:173 [inline] RIP: 0010:__sanitizer_cov_trace_pc+0xd/0x60 kernel/kcov.c:207 Code: cc cc cc cc 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 65 48 8b 14 25 40 c2 03 00 <65> 8b 05 b4 7c 78 7e a9 00 01 ff 00 48 8b 34 24 74 0f f6 c4 01 74 RSP: 0018:ffffc900033efa58 EFLAGS: 00000283 RAX: ffff88807b077800 RBX: ffff88807b077800 RCX: 1ffffffff27b1189 RDX: ffff88802a5a3b80 RSI: ffffffff8968488d RDI: ffff88807b077f70 RBP: ffffc900033efbb0 R08: 0000000000000001 R09: fffffbfff27a900c R10: ffffffff93d48067 R11: ffffffff8ae000eb R12: ffff88807b077800 R13: dffffc0000000000 R14: ffff88807b077e40 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564f4fc1e3a8 CR3: 000000000d57a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_gc+0x563/0x13b0 net/unix/garbage.c:319 unix_release_sock+0xa93/0xf80 net/unix/af_unix.c:683 unix_release+0x91/0xf0 net/unix/af_unix.c:1064 __sock_release+0xb0/0x270 net/socket.c:659 sock_close+0x1c/0x30 net/socket.c:1421 __fput+0x270/0xb80 fs/file_table.c:376 task_work_run+0x14f/0x250 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd5/0x270 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f9d6cbdac09 Code: Unable to access opcode bytes at 0x7f9d6cbdabdf. RSP: 002b:00007fff5952feb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9d6cbdac09 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 00007f9d6cc552b0 R08: ffffffffffffffb8 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 00007f9d6cc552b0 R13: 0000000000000000 R14: 00007f9d6cc55d00 R15: 00007f9d6cbabe70 Reported-by: syzbot+4fa4a2d1f5a5ee06f006@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4fa4a2d1f5a5ee06f006 Fixes: 1279f9d9dec2 ("af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240209220453.96053-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/garbage.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 767b338a7a2d4..9e1bab97c05ba 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -315,10 +315,11 @@ void unix_gc(void) __skb_queue_purge(&hitlist); #if IS_ENABLED(CONFIG_AF_UNIX_OOB) - list_for_each_entry_safe(u, next, &gc_candidates, link) { - struct sk_buff *skb = u->oob_skb; + while (!list_empty(&gc_candidates)) { + u = list_entry(gc_candidates.next, struct unix_sock, link); + if (u->oob_skb) { + struct sk_buff *skb = u->oob_skb; - if (skb) { u->oob_skb = NULL; kfree_skb(skb); } -- GitLab From a76072bc73c77cbdc6c77e5893376939894e6f73 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 11:35:39 +0200 Subject: [PATCH 0410/1418] of: overlay: Reorder struct fragment fields kerneldoc [ Upstream commit 5d007ffdf6025fe83e497c44ed7c8aa8f150c4d1 ] The fields of the fragment structure were reordered, but the kerneldoc was not updated. Fixes: 81225ea682f45629 ("of: overlay: reorder fields in struct fragment") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/cfa36d2bb95e3c399c415dbf58057302c70ef375.1695893695.git.geert+renesas@glider.be Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 4402871b5c0c0..e663d5585a057 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; -- GitLab From 00459ae532d6f1e7c720b5a331f40f72cf158dca Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 16:10:51 -0600 Subject: [PATCH 0411/1418] net: restore alpha order to Ethernet devices in config [ Upstream commit a1331535aeb41b08fe0c2c78af51885edc93615b ] The filename "wangxun" sorts between "intel" and "xscale", but xscale/Kconfig contains "Intel XScale" prompts, so Wangxun ends up in the wrong place in the config front-ends. Move wangxun/Kconfig so the Wangxun devices appear in order in the user interface. Fixes: 3ce7547e5b71 ("net: txgbe: Add build support for txgbe") Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230307221051.890135-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 1917da7841919..5a274b99f2992 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" source "drivers/net/ethernet/tundra/Kconfig" source "drivers/net/ethernet/vertexcom/Kconfig" source "drivers/net/ethernet/via/Kconfig" +source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/wiznet/Kconfig" source "drivers/net/ethernet/xilinx/Kconfig" source "drivers/net/ethernet/xircom/Kconfig" -- GitLab From 174ac6b53a20cc7f466eead68ccee55ab633e5a1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 6 Feb 2023 16:39:20 +0100 Subject: [PATCH 0412/1418] mlxsw: spectrum_acl_tcam: Make fini symmetric to init [ Upstream commit 61fe3b9102ac84ba479ab84d8f5454af2e21e468 ] Move mutex_destroy() to the end to make the function symmetric with mlxsw_sp_acl_tcam_init(). No functional changes. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index dc2e204bcd727..2107de4e9d99b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -86,10 +86,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - mutex_destroy(&tcam->lock); ops->fini(mlxsw_sp, tcam->priv); bitmap_free(tcam->used_groups); bitmap_free(tcam->used_regions); + mutex_destroy(&tcam->lock); } int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, -- GitLab From 5dbedec7e5cf668caa0d76e02915eef16d22e97f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 6 Feb 2023 16:39:19 +0100 Subject: [PATCH 0413/1418] mlxsw: spectrum_acl_tcam: Add missing mutex_destroy() [ Upstream commit 65823e07b1e4055b6278725fd92f4d7e6f8d53fd ] Pair mutex_init() with a mutex_destroy() in the error path. Found during code review. No functional changes. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 2107de4e9d99b..41eac7dfb67e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, max_regions = max_tcam_regions; tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) - return -ENOMEM; + if (!tcam->used_regions) { + err = -ENOMEM; + goto err_alloc_used_regions; + } tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); @@ -78,6 +80,8 @@ err_tcam_init: bitmap_free(tcam->used_groups); err_alloc_used_groups: bitmap_free(tcam->used_regions); +err_alloc_used_regions: + mutex_destroy(&tcam->lock); return err; } -- GitLab From e30f82597bf64ad32f3b9718bb12791bf3926f3d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 15 May 2023 11:10:49 -0400 Subject: [PATCH 0414/1418] PCI: layerscape: Add the endpoint linkup notifier support [ Upstream commit 061cbfab09fb35898f2907d42f936cf9ae271d93 ] Layerscape has PME interrupt, which can be used as linkup notifier. Set CFG_READY bit of PEX_PF0_CONFIG to enable accesses from root complex when linkup detected. Link: https://lore.kernel.org/r/20230515151049.2797105-1-Frank.Li@nxp.com Signed-off-by: Xiaowei Bao Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- .../pci/controller/dwc/pci-layerscape-ep.c | 100 +++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index ad99707b3b994..5b27554e071a1 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -18,6 +18,20 @@ #include "pcie-designware.h" +#define PEX_PF0_CONFIG 0xC0014 +#define PEX_PF0_CFG_READY BIT(0) + +/* PEX PFa PCIE PME and message interrupt registers*/ +#define PEX_PF0_PME_MES_DR 0xC0020 +#define PEX_PF0_PME_MES_DR_LUD BIT(7) +#define PEX_PF0_PME_MES_DR_LDD BIT(9) +#define PEX_PF0_PME_MES_DR_HRD BIT(10) + +#define PEX_PF0_PME_MES_IER 0xC0028 +#define PEX_PF0_PME_MES_IER_LUDIE BIT(7) +#define PEX_PF0_PME_MES_IER_LDDIE BIT(9) +#define PEX_PF0_PME_MES_IER_HRDIE BIT(10) + #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) struct ls_pcie_ep_drvdata { @@ -30,8 +44,84 @@ struct ls_pcie_ep { struct dw_pcie *pci; struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; + int irq; + bool big_endian; }; +static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + return ioread32be(pci->dbi_base + offset); + else + return ioread32(pci->dbi_base + offset); +} + +static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + iowrite32be(value, pci->dbi_base + offset); + else + iowrite32(value, pci->dbi_base + offset); +} + +static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) +{ + struct ls_pcie_ep *pcie = dev_id; + struct dw_pcie *pci = pcie->pci; + u32 val, cfg; + + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); + ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); + + if (!val) + return IRQ_NONE; + + if (val & PEX_PF0_PME_MES_DR_LUD) { + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); + cfg |= PEX_PF0_CFG_READY; + ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); + dw_pcie_ep_linkup(&pci->ep); + + dev_dbg(pci->dev, "Link up\n"); + } else if (val & PEX_PF0_PME_MES_DR_LDD) { + dev_dbg(pci->dev, "Link down\n"); + } else if (val & PEX_PF0_PME_MES_DR_HRD) { + dev_dbg(pci->dev, "Hot reset\n"); + } + + return IRQ_HANDLED; +} + +static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie, + struct platform_device *pdev) +{ + u32 val; + int ret; + + pcie->irq = platform_get_irq_byname(pdev, "pme"); + if (pcie->irq < 0) + return pcie->irq; + + ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler, + IRQF_SHARED, pdev->name, pcie); + if (ret) { + dev_err(&pdev->dev, "Can't register PCIe IRQ\n"); + return ret; + } + + /* Enable interrupts */ + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER); + val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE | + PEX_PF0_PME_MES_IER_LUDIE; + ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val); + + return 0; +} + static const struct pci_epc_features* ls_pcie_ep_get_features(struct dw_pcie_ep *ep) { @@ -124,6 +214,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -143,6 +234,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ops = pcie->drvdata->dw_pcie_ops; ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4); + ls_epc->linkup_notifier = true; pcie->pci = pci; pcie->ls_epc = ls_epc; @@ -154,9 +246,15 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ep.ops = &ls_pcie_ep_ops; + pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian"); + platform_set_drvdata(pdev, pcie); - return dw_pcie_ep_init(&pci->ep); + ret = dw_pcie_ep_init(&pci->ep); + if (ret) + return ret; + + return ls_pcie_ep_interrupt_init(pcie, pdev); } static struct platform_driver ls_pcie_ep_driver = { -- GitLab From 507eeaad4d32174640440f225a30112d8cccd374 Mon Sep 17 00:00:00 2001 From: Xiaowei Bao Date: Thu, 20 Jul 2023 09:58:34 -0400 Subject: [PATCH 0415/1418] PCI: layerscape: Add workaround for lost link capabilities during reset [ Upstream commit 17cf8661ee0f065c08152e611a568dd1fb0285f1 ] The endpoint controller loses the Maximum Link Width and Supported Link Speed value from the Link Capabilities Register - initially configured by the Reset Configuration Word (RCW) - during a link-down or hot reset event. Address this issue in the endpoint event handler. Link: https://lore.kernel.org/r/20230720135834.1977616-2-Frank.Li@nxp.com Fixes: a805770d8a22 ("PCI: layerscape: Add EP mode support") Signed-off-by: Xiaowei Bao Signed-off-by: Hou Zhiqiang Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- .../pci/controller/dwc/pci-layerscape-ep.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index 5b27554e071a1..dd7d74fecc48e 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -45,6 +45,7 @@ struct ls_pcie_ep { struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; int irq; + u32 lnkcap; bool big_endian; }; @@ -73,6 +74,7 @@ static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) struct ls_pcie_ep *pcie = dev_id; struct dw_pcie *pci = pcie->pci; u32 val, cfg; + u8 offset; val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); @@ -81,6 +83,19 @@ static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) return IRQ_NONE; if (val & PEX_PF0_PME_MES_DR_LUD) { + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + + /* + * The values of the Maximum Link Width and Supported Link + * Speed from the Link Capabilities Register will be lost + * during link down or hot reset. Restore initial value + * that configured by the Reset Configuration Word (RCW). + */ + dw_pcie_dbi_ro_wr_en(pci); + dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap); + dw_pcie_dbi_ro_wr_dis(pci); + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); cfg |= PEX_PF0_CFG_READY; ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); @@ -214,6 +229,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + u8 offset; int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); @@ -250,6 +266,9 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pcie); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP); + ret = dw_pcie_ep_init(&pci->ep); if (ret) return ret; -- GitLab From 0cea0c330a11461d0fbad5347a5d68d499db56fd Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 14 Apr 2023 11:19:46 +0200 Subject: [PATCH 0416/1418] ARM: dts: imx: Adjust dma-apbh node name [ Upstream commit e9f5cd85f1f931bb7b64031492f7051187ccaac7 ] Currently the dtbs_check generates warnings like this: $nodename:0: 'dma-apbh@110000' does not match '^dma-controller(@.*)?$' So fix all affected dma-apbh node names. Signed-off-by: Stefan Wahren Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx23.dtsi | 2 +- arch/arm/boot/dts/imx28.dtsi | 2 +- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- arch/arm/boot/dts/imx6sx.dtsi | 2 +- arch/arm/boot/dts/imx6ul.dtsi | 2 +- arch/arm/boot/dts/imx7s.dtsi | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index ec476b1596496..b236d23f80715 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -59,7 +59,7 @@ reg = <0x80000000 0x2000>; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx23-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <0 14 20 0 diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index b15df16ecb01a..b81592a613112 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -78,7 +78,7 @@ status = "disabled"; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx28-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <82 83 84 85 diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index ff1e0173b39be..2c6eada01d792 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -150,7 +150,7 @@ interrupt-parent = <&gpc>; ranges; - dma_apbh: dma-apbh@110000 { + dma_apbh: dma-controller@110000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x00110000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 1f1053a898fbf..67d344ae76b51 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -209,7 +209,7 @@ power-domains = <&pd_pu>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = , diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 2b5996395701a..aac081b6daaac 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -164,7 +164,7 @@ <0x00a06000 0x2000>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 4b23630fc738d..2940dacaa56fc 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1267,7 +1267,7 @@ }; }; - dma_apbh: dma-apbh@33000000 { + dma_apbh: dma-controller@33000000 { compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x33000000 0x2000>; interrupts = , -- GitLab From 49e734926a4b07308d98dc9d3c8f05eb77f1da00 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 17 Dec 2022 02:08:53 +0100 Subject: [PATCH 0417/1418] ARM: dts: imx7s: Drop dma-apb interrupt-names [ Upstream commit 9928f0a9e7c0cee3360ca1442b4001d34ad67556 ] Drop "interrupt-names" property, since it is broken. The drivers/dma/mxs-dma.c in Linux kernel does not use it, the property contains duplicate array entries in existing DTs, and even malformed entries (gmpi, should have been gpmi). Get rid of that optional property altogether. Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7s.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 2940dacaa56fc..69aebc691526f 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1274,7 +1274,6 @@ , , ; - interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3"; #dma-cells = <1>; dma-channels = <4>; clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; -- GitLab From ed9fdc82cafbcf8a46b55d315219bf9464621bca Mon Sep 17 00:00:00 2001 From: Elson Roy Serrao Date: Fri, 24 Mar 2023 14:47:57 -0700 Subject: [PATCH 0418/1418] usb: gadget: Properly configure the device for remote wakeup [ Upstream commit b93c2a68f3d9dc98ec30dcb342ae47c1c8d09d18 ] The wakeup bit in the bmAttributes field indicates whether the device is configured for remote wakeup. But this field should be allowed to set only if the UDC supports such wakeup mechanism. So configure this field based on UDC capability. Also inform the UDC whether the device is configured for remote wakeup by implementing a gadget op. Reviewed-by: Thinh Nguyen Signed-off-by: Elson Roy Serrao Link: https://lore.kernel.org/r/1679694482-16430-2-git-send-email-quic_eserrao@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/composite.c | 18 ++++++++++++++++++ drivers/usb/gadget/configfs.c | 3 +++ drivers/usb/gadget/udc/core.c | 27 +++++++++++++++++++++++++++ drivers/usb/gadget/udc/trace.h | 5 +++++ include/linux/usb/composite.h | 2 ++ include/linux/usb/gadget.h | 8 ++++++++ 6 files changed, 63 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index cb0a4e2cdbb73..247cca46cdfae 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, return min(val, 900U) / 8; } +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c) +{ + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { + /* Reset the rw bit if gadget is not capable of it */ + if (!g->wakeup_capable && g->ops->set_remote_wakeup) { + WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", + c->bConfigurationValue); + c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; + } + } +} + static int config_buf(struct usb_configuration *config, enum usb_device_speed speed, void *buf, u8 type) { @@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev, power = min(power, 500U); else power = min(power, 900U); + + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) + usb_gadget_set_remote_wakeup(gadget, 1); + else + usb_gadget_set_remote_wakeup(gadget, 0); done: if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) usb_gadget_set_selfpowered(gadget); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 4dcf29577f8f1..b94aec6227c51 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget, if (gadget_is_otg(gadget)) c->descriptors = otg_desc; + /* Properly configure the bmAttributes wakeup bit */ + check_remote_wakeup_config(gadget, c); + cfg = container_of(c, struct config_usb_cfg, c); if (!list_empty(&cfg->string_list)) { i = 0; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index c40f2ecbe1b8c..0edd9e53fc5a1 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -525,6 +525,33 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_wakeup); +/** + * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. + * @gadget:the device being configured for remote wakeup + * @set:value to be configured. + * + * set to one to enable remote wakeup feature and zero to disable it. + * + * returns zero on success, else negative errno. + */ +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ + int ret = 0; + + if (!gadget->ops->set_remote_wakeup) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = gadget->ops->set_remote_wakeup(gadget, set); + +out: + trace_usb_gadget_set_remote_wakeup(gadget, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); + /** * usb_gadget_set_selfpowered - sets the device selfpowered feature. * @gadget:the device being declared as self-powered diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h index abdbcb1bacb0b..a5ed26fbc2dad 100644 --- a/drivers/usb/gadget/udc/trace.h +++ b/drivers/usb/gadget/udc/trace.h @@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_ARGS(g, ret) ); +DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, + TP_PROTO(struct usb_gadget *g, int ret), + TP_ARGS(g, ret) +); + DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 43ac3fa760dbe..9783b9107d76b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, struct usb_ep *ep0); void composite_dev_cleanup(struct usb_composite_dev *cdev); +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c); static inline struct usb_composite_driver *to_cdriver( struct usb_gadget_driver *gdrv) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea99e9..5bec668b41dcd 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -309,6 +309,7 @@ struct usb_udc; struct usb_gadget_ops { int (*get_frame)(struct usb_gadget *); int (*wakeup)(struct usb_gadget *); + int (*set_remote_wakeup)(struct usb_gadget *, int set); int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); int (*vbus_session) (struct usb_gadget *, int is_active); int (*vbus_draw) (struct usb_gadget *, unsigned mA); @@ -383,6 +384,8 @@ struct usb_gadget_ops { * @connected: True if gadget is connected. * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag * indicates that it supports LPM as per the LPM ECN & errata. + * @wakeup_capable: True if gadget is capable of sending remote wakeup. + * @wakeup_armed: True if gadget is armed by the host for remote wakeup. * @irq: the interrupt number for device controller. * @id_number: a unique ID number for ensuring that gadget names are distinct * @@ -444,6 +447,8 @@ struct usb_gadget { unsigned deactivated:1; unsigned connected:1; unsigned lpm_capable:1; + unsigned wakeup_capable:1; + unsigned wakeup_armed:1; int irq; int id_number; }; @@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g) #if IS_ENABLED(CONFIG_USB_GADGET) int usb_gadget_frame_number(struct usb_gadget *gadget); int usb_gadget_wakeup(struct usb_gadget *gadget); +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); int usb_gadget_set_selfpowered(struct usb_gadget *gadget); int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); int usb_gadget_vbus_connect(struct usb_gadget *gadget); @@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_wakeup(struct usb_gadget *gadget) { return 0; } +static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ return 0; } static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) -- GitLab From f8faa536370ec9db460bac96460e16801f62325e Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Thu, 13 Apr 2023 23:57:42 -0700 Subject: [PATCH 0419/1418] Input: xpad - add constants for GIP interface numbers [ Upstream commit f9b2e603c6216824e34dc9a67205d98ccc9a41ca ] Wired GIP devices present multiple interfaces with the same USB identification other than the interface number. This adds constants for differentiating two of them and uses them where appropriate Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20230411031650.960322-2-vi@endrift.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 02f3bc4e4895e..13c36f51b9353 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -564,6 +564,9 @@ struct xboxone_init_packet { #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) +#define GIP_WIRED_INTF_DATA 0 +#define GIP_WIRED_INTF_AUDIO 1 + /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). @@ -2008,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id } if (xpad->xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { + intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by -- GitLab From 8745f3592ee4a7b49ede16ddd3f12a41ecaa23c9 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 31 Mar 2023 11:31:23 +0800 Subject: [PATCH 0420/1418] iommu/sprd: Release dma buffer to avoid memory leak [ Upstream commit 9afea57384d4ae7b2034593eac7fa76c7122762a ] When attaching to a domain, the driver would alloc a DMA buffer which is used to store address mapping table, and it need to be released when the IOMMU domain is freed. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20230331033124.864691-2-zhang.lyra@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/sprd-iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 8261066de07d7..e4358393fe378 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) return &dom->domain; } -static void sprd_iommu_domain_free(struct iommu_domain *domain) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - - kfree(dom); -} - static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) { struct sprd_iommu_device *sdev = dom->sdev; @@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); } +static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) +{ + size_t pgt_size; + + /* Nothing need to do if the domain hasn't been attached */ + if (!dom->sdev) + return; + + pgt_size = sprd_iommu_pgt_size(&dom->domain); + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + dom->sdev = NULL; + sprd_iommu_hw_en(dom->sdev, false); +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + sprd_iommu_cleanup(dom); + kfree(dom); +} + static int sprd_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { -- GitLab From e89c84422f35ce9fcb0fe9e3f3f60506586a7bae Mon Sep 17 00:00:00 2001 From: Tomas Krcka Date: Wed, 29 Mar 2023 12:34:19 +0000 Subject: [PATCH 0421/1418] iommu/arm-smmu-v3: Acknowledge pri/event queue overflow if any [ Upstream commit 67ea0b7ce41844eae7c10bb04dfe66a23318c224 ] When an overflow occurs in the PRI queue, the SMMU toggles the overflow flag in the PROD register. To exit the overflow condition, the PRI thread is supposed to acknowledge it by toggling this flag in the CONS register. Unacknowledged overflow causes the queue to stop adding anything new. Currently, the priq thread always writes the CONS register back to the SMMU after clearing the queue. The writeback is not necessary if the OVFLG in the PROD register has not been changed, no overflow has occured. This commit checks the difference of the overflow flag between CONS and PROD register. If it's different, toggles the OVACKFLG flag in the CONS register and write it to the SMMU. The situation is similar for the event queue. The acknowledge register is also toggled after clearing the event queue but never propagated to the hardware. This would only be done the next time when executing evtq thread. Unacknowledged event queue overflow doesn't affect the event queue, because the SMMU still adds elements to that queue when the overflow condition is active. But it feel nicer to keep SMMU in sync when possible, so use the same way here as well. Signed-off-by: Tomas Krcka Link: https://lore.kernel.org/r/20230329123420.34641-1-tomas.krcka@gmail.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8966f7d5aab61..82f100e591b5a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); } +static void queue_sync_cons_ovf(struct arm_smmu_queue *q) +{ + struct arm_smmu_ll_queue *llq = &q->llq; + + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) + return; + + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | + Q_IDX(llq, llq->cons); + queue_sync_cons_out(q); +} + static int queue_sync_prod_in(struct arm_smmu_queue *q) { u32 prod; @@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } @@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); - queue_sync_cons_out(q); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } -- GitLab From 39c6312009574ca73865354133ca222e7753a71b Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 11 Jan 2023 16:59:43 +0800 Subject: [PATCH 0422/1418] fs/ntfs3: Fix a possible null-pointer dereference in ni_clear() [ Upstream commit ec275bf9693d19cc0fdce8436f4c425ced86f6e7 ] In a previous commit c1006bd13146, ni->mi.mrec in ni_write_inode() could be NULL, and thus a NULL check is added for this variable. However, in the same call stack, ni->mi.mrec can be also dereferenced in ni_clear(): ntfs_evict_inode(inode) ni_write_inode(inode, ...) ni = ntfs_i(inode); is_rec_inuse(ni->mi.mrec) -> Add a NULL check by previous commit ni_clear(ntfs_i(inode)) is_rec_inuse(ni->mi.mrec) -> No check Thus, a possible null-pointer dereference may exist in ni_clear(). To fix it, a NULL check is added in this function. Signed-off-by: Jia-Ju Bai Reported-by: TOTE Robot Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index bb7e33c240737..1f0e230ec9e2c 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni) { struct rb_node *node; - if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) + if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec)) ni_delete_all(ni); al_destroy(ni); -- GitLab From 976126f2def45f4075f18372bc4e97bb5da3757a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Feb 2023 09:59:10 +0100 Subject: [PATCH 0423/1418] clk: tegra20: fix gcc-7 constant overflow warning [ Upstream commit b4a2adbf3586efa12fe78b9dec047423e01f3010 ] Older gcc versions get confused by comparing a u32 value to a negative constant in a switch()/case block: drivers/clk/tegra/clk-tegra20.c: In function 'tegra20_clk_measure_input_freq': drivers/clk/tegra/clk-tegra20.c:581:2: error: case label does not reduce to an integer constant case OSC_CTRL_OSC_FREQ_12MHZ: ^~~~ drivers/clk/tegra/clk-tegra20.c:593:2: error: case label does not reduce to an integer constant case OSC_CTRL_OSC_FREQ_26MHZ: Make the constants unsigned instead. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230227085914.2560984-1-arnd@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra20.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 422d782475532..dcacc5064d339 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -21,24 +21,24 @@ #define MISC_CLK_ENB 0x48 #define OSC_CTRL 0x50 -#define OSC_CTRL_OSC_FREQ_MASK (3<<30) -#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30) -#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30) -#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30) -#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30) -#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK) - -#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28) -#define OSC_CTRL_PLL_REF_DIV_1 (0<<28) -#define OSC_CTRL_PLL_REF_DIV_2 (1<<28) -#define OSC_CTRL_PLL_REF_DIV_4 (2<<28) +#define OSC_CTRL_OSC_FREQ_MASK (3u<<30) +#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30) +#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30) +#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30) +#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30) +#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK) + +#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28) +#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28) +#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28) +#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28) #define OSC_FREQ_DET 0x58 -#define OSC_FREQ_DET_TRIG (1<<31) +#define OSC_FREQ_DET_TRIG (1u<<31) #define OSC_FREQ_DET_STATUS 0x5c -#define OSC_FREQ_DET_BUSY (1<<31) -#define OSC_FREQ_DET_CNT_MASK 0xFFFF +#define OSC_FREQ_DET_BUSYu (1<<31) +#define OSC_FREQ_DET_CNT_MASK 0xFFFFu #define TEGRA20_CLK_PERIPH_BANKS 3 -- GitLab From 0d04e45c65f0785e558b93d2631d58680f263e10 Mon Sep 17 00:00:00 2001 From: Edward Lo Date: Tue, 4 Oct 2022 23:15:06 +0800 Subject: [PATCH 0424/1418] fs/ntfs3: Add length check in indx_get_root [ Upstream commit 08e8cf5f2d9ec383a2e339a2711b62a54ff3fba0 ] This adds a length check to guarantee the retrieved index root is legit. [ 162.459513] BUG: KASAN: use-after-free in hdr_find_e.isra.0+0x10c/0x320 [ 162.460176] Read of size 2 at addr ffff8880037bca99 by task mount/243 [ 162.460851] [ 162.461252] CPU: 0 PID: 243 Comm: mount Not tainted 6.0.0-rc7 #42 [ 162.461744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 162.462609] Call Trace: [ 162.462954] [ 162.463276] dump_stack_lvl+0x49/0x63 [ 162.463822] print_report.cold+0xf5/0x689 [ 162.464608] ? unwind_get_return_address+0x3a/0x60 [ 162.465766] ? hdr_find_e.isra.0+0x10c/0x320 [ 162.466975] kasan_report+0xa7/0x130 [ 162.467506] ? _raw_spin_lock_irq+0xc0/0xf0 [ 162.467998] ? hdr_find_e.isra.0+0x10c/0x320 [ 162.468536] __asan_load2+0x68/0x90 [ 162.468923] hdr_find_e.isra.0+0x10c/0x320 [ 162.469282] ? cmp_uints+0xe0/0xe0 [ 162.469557] ? cmp_sdh+0x90/0x90 [ 162.469864] ? ni_find_attr+0x214/0x300 [ 162.470217] ? ni_load_mi+0x80/0x80 [ 162.470479] ? entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 162.470931] ? ntfs_bread_run+0x190/0x190 [ 162.471307] ? indx_get_root+0xe4/0x190 [ 162.471556] ? indx_get_root+0x140/0x190 [ 162.471833] ? indx_init+0x1e0/0x1e0 [ 162.472069] ? fnd_clear+0x115/0x140 [ 162.472363] ? _raw_spin_lock_irqsave+0x100/0x100 [ 162.472731] indx_find+0x184/0x470 [ 162.473461] ? sysvec_apic_timer_interrupt+0x57/0xc0 [ 162.474429] ? indx_find_buffer+0x2d0/0x2d0 [ 162.474704] ? do_syscall_64+0x3b/0x90 [ 162.474962] dir_search_u+0x196/0x2f0 [ 162.475381] ? ntfs_nls_to_utf16+0x450/0x450 [ 162.475661] ? ntfs_security_init+0x3d6/0x440 [ 162.475906] ? is_sd_valid+0x180/0x180 [ 162.476191] ntfs_extend_init+0x13f/0x2c0 [ 162.476496] ? ntfs_fix_post_read+0x130/0x130 [ 162.476861] ? iput.part.0+0x286/0x320 [ 162.477325] ntfs_fill_super+0x11e0/0x1b50 [ 162.477709] ? put_ntfs+0x1d0/0x1d0 [ 162.477970] ? vsprintf+0x20/0x20 [ 162.478258] ? set_blocksize+0x95/0x150 [ 162.478538] get_tree_bdev+0x232/0x370 [ 162.478789] ? put_ntfs+0x1d0/0x1d0 [ 162.479038] ntfs_fs_get_tree+0x15/0x20 [ 162.479374] vfs_get_tree+0x4c/0x130 [ 162.479729] path_mount+0x654/0xfe0 [ 162.480124] ? putname+0x80/0xa0 [ 162.480484] ? finish_automount+0x2e0/0x2e0 [ 162.480894] ? putname+0x80/0xa0 [ 162.481467] ? kmem_cache_free+0x1c4/0x440 [ 162.482280] ? putname+0x80/0xa0 [ 162.482714] do_mount+0xd6/0xf0 [ 162.483264] ? path_mount+0xfe0/0xfe0 [ 162.484782] ? __kasan_check_write+0x14/0x20 [ 162.485593] __x64_sys_mount+0xca/0x110 [ 162.486024] do_syscall_64+0x3b/0x90 [ 162.486543] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 162.487141] RIP: 0033:0x7f9d374e948a [ 162.488324] Code: 48 8b 0d 11 fa 2a 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 008 [ 162.489728] RSP: 002b:00007ffe30e73d18 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 162.490971] RAX: ffffffffffffffda RBX: 0000561cdb43a060 RCX: 00007f9d374e948a [ 162.491669] RDX: 0000561cdb43a260 RSI: 0000561cdb43a2e0 RDI: 0000561cdb442af0 [ 162.492050] RBP: 0000000000000000 R08: 0000561cdb43a280 R09: 0000000000000020 [ 162.492459] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000561cdb442af0 [ 162.493183] R13: 0000561cdb43a260 R14: 0000000000000000 R15: 00000000ffffffff [ 162.493644] [ 162.493908] [ 162.494214] The buggy address belongs to the physical page: [ 162.494761] page:000000003e38a3d5 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x37bc [ 162.496064] flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) [ 162.497278] raw: 000fffffc0000000 ffffea00000df1c8 ffffea00000df008 0000000000000000 [ 162.498928] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 162.500542] page dumped because: kasan: bad access detected [ 162.501057] [ 162.501242] Memory state around the buggy address: [ 162.502230] ffff8880037bc980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.502977] ffff8880037bca00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.503522] >ffff8880037bca80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.503963] ^ [ 162.504370] ffff8880037bcb00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.504766] ffff8880037bcb80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Signed-off-by: Edward Lo Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/index.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7371f7855e4c4..eee01db6e0cc5 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; + struct INDEX_ROOT *root = NULL; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); @@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (attr) *attr = a; - return resident_data_ex(a, sizeof(struct INDEX_ROOT)); + root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); + + /* length check */ + if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > + le32_to_cpu(a->res.data_size)) { + return NULL; + } + + return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, -- GitLab From b3152afc0eb864f7c6ecad134a15b577ef7aec77 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Oct 2022 12:32:51 +0530 Subject: [PATCH 0425/1418] fs/ntfs3: Fix NULL dereference in ni_write_inode [ Upstream commit 8dae4f6341e335a09575be60b4fdf697c732a470 ] Syzbot reports a NULL dereference in ni_write_inode. When creating a new inode, if allocation fails in mi_init function (called in mi_format_new function), mi->mrec is set to NULL. In the error path of this inode creation, mi->mrec is later dereferenced in ni_write_inode. Add a NULL check to prevent NULL dereference. Link: https://syzkaller.appspot.com/bug?extid=f45957555ed4a808cc7a Reported-and-tested-by: syzbot+f45957555ed4a808cc7a@syzkaller.appspotmail.com Signed-off-by: Abdun Nihaal Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/frecord.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 1f0e230ec9e2c..d260260900241 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) return 0; } + if (!ni->mi.mrec) + goto out; + if (is_rec_inuse(ni->mi.mrec) && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { bool modified = false; -- GitLab From 329fc4d3f73d865b25f2ee4eafafb040ace37ad5 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 17 Nov 2022 17:19:12 +0800 Subject: [PATCH 0426/1418] fs/ntfs3: Fix NULL pointer dereference in 'ni_write_inode' [ Upstream commit db2a3cc6a3481076da6344cc62a80a4e2525f36f ] Syzbot found the following issue: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000016 Mem abort info: ESR = 0x0000000096000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000010af56000 [0000000000000016] pgd=08000001090da003, p4d=08000001090da003, pud=08000001090ce003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 3036 Comm: syz-executor206 Not tainted 6.0.0-rc6-syzkaller-17739-g16c9f284e746 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : is_rec_inuse fs/ntfs3/ntfs.h:313 [inline] pc : ni_write_inode+0xac/0x798 fs/ntfs3/frecord.c:3232 lr : ni_write_inode+0xa0/0x798 fs/ntfs3/frecord.c:3226 sp : ffff8000126c3800 x29: ffff8000126c3860 x28: 0000000000000000 x27: ffff0000c8b02000 x26: ffff0000c7502320 x25: ffff0000c7502288 x24: 0000000000000000 x23: ffff80000cbec91c x22: ffff0000c8b03000 x21: ffff0000c8b02000 x20: 0000000000000001 x19: ffff0000c75024d8 x18: 00000000000000c0 x17: ffff80000dd1b198 x16: ffff80000db59158 x15: ffff0000c4b6b500 x14: 00000000000000b8 x13: 0000000000000000 x12: ffff0000c4b6b500 x11: ff80800008be1b60 x10: 0000000000000000 x9 : ffff0000c4b6b500 x8 : 0000000000000000 x7 : ffff800008be1b50 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000008 x1 : 0000000000000001 x0 : 0000000000000000 Call trace: is_rec_inuse fs/ntfs3/ntfs.h:313 [inline] ni_write_inode+0xac/0x798 fs/ntfs3/frecord.c:3232 ntfs_evict_inode+0x54/0x84 fs/ntfs3/inode.c:1744 evict+0xec/0x334 fs/inode.c:665 iput_final fs/inode.c:1748 [inline] iput+0x2c4/0x324 fs/inode.c:1774 ntfs_new_inode+0x7c/0xe0 fs/ntfs3/fsntfs.c:1660 ntfs_create_inode+0x20c/0xe78 fs/ntfs3/inode.c:1278 ntfs_create+0x54/0x74 fs/ntfs3/namei.c:100 lookup_open fs/namei.c:3413 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x804/0x11c4 fs/namei.c:3688 do_filp_open+0xdc/0x1b8 fs/namei.c:3718 do_sys_openat2+0xb8/0x22c fs/open.c:1311 do_sys_open fs/open.c:1327 [inline] __do_sys_openat fs/open.c:1343 [inline] __se_sys_openat fs/open.c:1338 [inline] __arm64_sys_openat+0xb0/0xe0 fs/open.c:1338 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 Code: 97dafee4 340001b4 f9401328 2a1f03e0 (79402d14) ---[ end trace 0000000000000000 ]--- Above issue may happens as follows: ntfs_new_inode mi_init mi->mrec = kmalloc(sbi->record_size, GFP_NOFS); -->failed to allocate memory if (!mi->mrec) return -ENOMEM; iput iput_final evict ntfs_evict_inode ni_write_inode is_rec_inuse(ni->mi.mrec)-> As 'ni->mi.mrec' is NULL trigger NULL-ptr-deref To solve above issue if new inode failed make inode bad before call 'iput()' in 'ntfs_new_inode()'. Reported-by: syzbot+f45957555ed4a808cc7a@syzkaller.appspotmail.com Signed-off-by: Ye Bin Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 1eac80d55b554..4c2d079b3d49b 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1674,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) out: if (err) { + make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } -- GitLab From e5f488993bc1893b84d93e9915155fab66a070d2 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 27 Mar 2023 13:30:29 +0530 Subject: [PATCH 0427/1418] iommu/arm-smmu-qcom: Limit the SMR groups to 128 [ Upstream commit 12261134732689b7e30c59db9978f81230965181 ] Some platforms support more than 128 stream matching groups than what is defined by the ARM SMMU architecture specification. But due to some unknown reasons, those additional groups don't exhibit the same behavior as the architecture supported ones. For instance, the additional groups will not detect the quirky behavior of some firmware versions intercepting writes to S2CR register, thus skipping the quirk implemented in the driver and causing boot crash. So let's limit the groups to 128 for now until the issue with those groups are fixed and issue a notice to users in that case. Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20230327080029.11584-1-manivannan.sadhasivam@linaro.org [will: Reworded the comment slightly] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d80065c8105af..f15dcb9e4175c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) { - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + unsigned int last_s2cr; u32 reg; u32 smr; int i; + /* + * Some platforms support more than the Arm SMMU architected maximum of + * 128 stream matching groups. For unknown reasons, the additional + * groups don't exhibit the same behavior as the architected registers, + * so limit the groups to 128 until the behavior is fixed for the other + * groups. + */ + if (smmu->num_mapping_groups > 128) { + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); + smmu->num_mapping_groups = 128; + } + + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + /* * With some firmware versions writes to S2CR of type FAULT are * ignored, and writing BYPASS will end up written as FAULT in the -- GitLab From afbf1a5cef46427241e76704991cc83c9b1a463b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Mar 2023 17:47:03 -0600 Subject: [PATCH 0428/1418] RDMA/core: Fix multiple -Warray-bounds warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aa4d540b4150052ae3b36d286b9c833a961ce291 ] GCC-13 (and Clang)[1] does not like to access a partially allocated object, since it cannot reason about it for bounds checking. In this case 140 bytes are allocated for an object of type struct ib_umad_packet: packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); However, notice that sizeof(*packet) is only 104 bytes: struct ib_umad_packet { struct ib_mad_send_buf * msg; /* 0 8 */ struct ib_mad_recv_wc * recv_wc; /* 8 8 */ struct list_head list; /* 16 16 */ int length; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct ib_user_mad mad __attribute__((__aligned__(8))); /* 40 64 */ /* size: 104, cachelines: 2, members: 5 */ /* sum members: 100, holes: 1, sum holes: 4 */ /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); and 36 bytes extra bytes are allocated for a flexible-array member in struct ib_user_mad: include/rdma/ib_mad.h: 120 enum { ... 123 IB_MGMT_RMPP_HDR = 36, ... } struct ib_user_mad { struct ib_user_mad_hdr hdr; /* 0 64 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u64 data[] __attribute__((__aligned__(8))); /* 64 0 */ /* size: 64, cachelines: 1, members: 2 */ /* forced alignments: 1 */ } __attribute__((__aligned__(8))); So we have sizeof(*packet) + IB_MGMT_RMPP_HDR == 140 bytes Then the address of the flex-array member (for which only 36 bytes were allocated) is casted and copied into a pointer to struct ib_rmpp_mad, which, in turn, is of size 256 bytes: rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; /* 0 24 */ struct ib_rmpp_hdr rmpp_hdr; /* 24 12 */ u8 data[220]; /* 36 220 */ /* size: 256, cachelines: 4, members: 3 */ }; The thing is that those 36 bytes allocated for flex-array member data in struct ib_user_mad onlly account for the size of both struct ib_mad_hdr and struct ib_rmpp_hdr, but nothing is left for array u8 data[220]. So, the compiler is legitimately complaining about accessing an object for which not enough memory was allocated. Apparently, the only members of struct ib_rmpp_mad that are relevant (that are actually being used) in function ib_umad_write() are mad_hdr and rmpp_hdr. So, instead of casting packet->mad.data to (struct ib_rmpp_mad *) create a new structure struct ib_rmpp_mad_hdr { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; } __packed; and cast packet->mad.data to (struct ib_rmpp_mad_hdr *). Notice that IB_MGMT_RMPP_HDR == sizeof(struct ib_rmpp_mad_hdr) == 36 bytes Refactor the rest of the code, accordingly. Fix the following warnings seen under GCC-13 and -Warray-bounds: drivers/infiniband/core/user_mad.c:564:50: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:566:42: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:618:25: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:622:44: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] Link: https://github.com/KSPP/linux/issues/273 Link: https://godbolt.org/z/oYWaGM4Yb [1] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZBpB91qQcB10m3Fw@work Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/user_mad.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d96c78e436f98..5c284dfbe6923 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -131,6 +131,11 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +struct ib_rmpp_mad_hdr { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; +} __packed; + #define CREATE_TRACE_POINTS #include @@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; + struct ib_rmpp_mad_hdr *rmpp_mad_hdr; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct rdma_ah_attr ah_attr; struct ib_ah *ah; - struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; @@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; - packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class); - if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; @@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); - rmpp_mad->mad_hdr.tid = *tid; + rmpp_mad_hdr->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) - && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); -- GitLab From 87632bc9ecff5ded93433bc0fca428019bdd1cfe Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 18 Jan 2024 10:05:05 -0800 Subject: [PATCH 0429/1418] mm: huge_memory: don't force huge page alignment on 32 bit commit 4ef9ad19e17676b9ef071309bc62020e2373705d upstream. commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused two issues [1] [2] reported on 32 bit system or compat userspace. It doesn't make too much sense to force huge page alignment on 32 bit system due to the constrained virtual address space. [1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ [2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: Jiri Slaby Reported-by: Suren Baghdasaryan Tested-by: Jiri Slaby Tested-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Christopher Lameter Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 59577946735b1..9736e762184bd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; -- GitLab From 65a389ef979b5ca96bc08aa165d6710fe8f1e890 Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 8 Nov 2023 09:07:01 -0600 Subject: [PATCH 0430/1418] mtd: spinand: gigadevice: Fix the get ecc status issue [ Upstream commit 59950610c0c00c7a06d8a75d2ee5d73dba4274cf ] Some GigaDevice ecc_get_status functions use on-stack buffer for spi_mem_op causes spi_mem_check_op failing, fix the issue by using spinand scratchbuf. Fixes: c40c7a990a46 ("mtd: spinand: Add support for GigaDevice GD5F1GQ4UExxG") Signed-off-by: Han Xu Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231108150701.593912-1-han.xu@nxp.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/spi/gigadevice.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 6b043e24855fb..9116ee7f023ed 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: -- GitLab From 0b27bf4c494d61e5663baa34c3edd7ccebf0ea44 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 21 Feb 2024 16:40:48 +0900 Subject: [PATCH 0431/1418] netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter [ Upstream commit 661779e1fcafe1b74b3f3fe8e980c1e207fea1fd ] syzbot reported the following uninit-value access issue [1]: netlink_to_full_skb() creates a new `skb` and puts the `skb->data` passed as a 1st arg of netlink_to_full_skb() onto new `skb`. The data size is specified as `len` and passed to skb_put_data(). This `len` is based on `skb->end` that is not data offset but buffer offset. The `skb->end` contains data and tailroom. Since the tailroom is not initialized when the new `skb` created, KMSAN detects uninitialized memory area when copying the data. This patch resolved this issue by correct the len from `skb->end` to `skb->len`, which is the actual data offset. BUG: KMSAN: kernel-infoleak-after-free in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak-after-free in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak-after-free in _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 copy_to_iter include/linux/uio.h:197 [inline] simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:532 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:420 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] packet_recvmsg+0xd9c/0x2000 net/packet/af_packet.c:3482 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg net/socket.c:1066 [inline] sock_read_iter+0x467/0x580 net/socket.c:1136 call_read_iter include/linux/fs.h:2014 [inline] new_sync_read fs/read_write.c:389 [inline] vfs_read+0x8f6/0xe00 fs/read_write.c:470 ksys_read+0x20f/0x4c0 fs/read_write.c:613 __do_sys_read fs/read_write.c:623 [inline] __se_sys_read fs/read_write.c:621 [inline] __x64_sys_read+0x93/0xd0 fs/read_write.c:621 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was stored to memory at: skb_put_data include/linux/skbuff.h:2622 [inline] netlink_to_full_skb net/netlink/af_netlink.c:181 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:298 [inline] __netlink_deliver_tap+0x5be/0xc90 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 [inline] netlink_deliver_tap_kernel net/netlink/af_netlink.c:347 [inline] netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x10f1/0x1250 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: free_pages_prepare mm/page_alloc.c:1087 [inline] free_unref_page_prepare+0xb0/0xa40 mm/page_alloc.c:2347 free_unref_page_list+0xeb/0x1100 mm/page_alloc.c:2533 release_pages+0x23d3/0x2410 mm/swap.c:1042 free_pages_and_swap_cache+0xd9/0xf0 mm/swap_state.c:316 tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] tlb_flush_mmu+0x6f5/0x980 mm/mmu_gather.c:300 tlb_finish_mmu+0x101/0x260 mm/mmu_gather.c:392 exit_mmap+0x49e/0xd30 mm/mmap.c:3321 __mmput+0x13f/0x530 kernel/fork.c:1349 mmput+0x8a/0xa0 kernel/fork.c:1371 exit_mm+0x1b8/0x360 kernel/exit.c:567 do_exit+0xd57/0x4080 kernel/exit.c:858 do_group_exit+0x2fd/0x390 kernel/exit.c:1021 __do_sys_exit_group kernel/exit.c:1032 [inline] __se_sys_exit_group kernel/exit.c:1030 [inline] __x64_sys_exit_group+0x3c/0x50 kernel/exit.c:1030 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Bytes 3852-3903 of 3904 are uninitialized Memory access of size 3904 starts at ffff88812ea1e000 Data copied to user address 0000000020003280 CPU: 1 PID: 5043 Comm: syz-executor297 Not tainted 6.7.0-rc5-syzkaller-00047-g5bd7ef53ffe5 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 Fixes: 1853c9496460 ("netlink, mmap: transform mmap skb into full skb on taps") Reported-and-tested-by: syzbot+34ad5fab48f7bf510349@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=34ad5fab48f7bf510349 [1] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240221074053.1794118-1-ryasuoka@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6857a4965fe87..e9b81cba1e2b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); -- GitLab From 0ac219c4c3ab253f3981f346903458d20bacab32 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:27:33 +0100 Subject: [PATCH 0432/1418] netlink: add nla be16/32 types to minlen array [ Upstream commit 9a0d18853c280f6a0ee99f91619f2442a17a323a ] BUG: KMSAN: uninit-value in nla_validate_range_unsigned lib/nlattr.c:222 [inline] BUG: KMSAN: uninit-value in nla_validate_int_range lib/nlattr.c:336 [inline] BUG: KMSAN: uninit-value in validate_nla lib/nlattr.c:575 [inline] BUG: KMSAN: uninit-value in __nla_validate_parse+0x2e20/0x45c0 lib/nlattr.c:631 nla_validate_range_unsigned lib/nlattr.c:222 [inline] nla_validate_int_range lib/nlattr.c:336 [inline] validate_nla lib/nlattr.c:575 [inline] ... The message in question matches this policy: [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), but because NLA_BE32 size in minlen array is 0, the validation code will read past the malformed (too small) attribute. Note: Other attributes, e.g. BITFIELD32, SINT, UINT.. are also missing: those likely should be added too. Reported-by: syzbot+3f497b07aa3baf2fb4d0@syzkaller.appspotmail.com Reported-by: xingwei lee Closes: https://lore.kernel.org/all/CABOYnLzFYHSnvTyS6zGa-udNX55+izqkOt2sB9WDqUcEGW6n8w@mail.gmail.com/raw Fixes: ecaf75ffd5f5 ("netlink: introduce bigendian integer types") Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20240221172740.5092-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- lib/nlattr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/nlattr.c b/lib/nlattr.c index dffd60e4065fd..86344df0ccf7b 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* -- GitLab From ab63de24ebea36fe73ac7121738595d704b66d96 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Feb 2024 14:56:02 +0100 Subject: [PATCH 0433/1418] net: ip_tunnel: prevent perpetual headroom growth [ Upstream commit 5ae1e9922bbdbaeb9cfbe91085ab75927488ac0f ] syzkaller triggered following kasan splat: BUG: KASAN: use-after-free in __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 Read of size 1 at addr ffff88812fb4000e by task syz-executor183/5191 [..] kasan_report+0xda/0x110 mm/kasan/report.c:588 __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 skb_flow_dissect_flow_keys include/linux/skbuff.h:1514 [inline] ___skb_get_hash net/core/flow_dissector.c:1791 [inline] __skb_get_hash+0xc7/0x540 net/core/flow_dissector.c:1856 skb_get_hash include/linux/skbuff.h:1556 [inline] ip_tunnel_xmit+0x1855/0x33c0 net/ipv4/ip_tunnel.c:748 ipip_tunnel_xmit+0x3cc/0x4e0 net/ipv4/ipip.c:308 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4349 dev_queue_xmit include/linux/netdevice.h:3134 [inline] neigh_connected_output+0x42c/0x5d0 net/core/neighbour.c:1592 ... ip_finish_output2+0x833/0x2550 net/ipv4/ip_output.c:235 ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323 .. iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1dbc/0x33c0 net/ipv4/ip_tunnel.c:831 ipgre_xmit+0x4a1/0x980 net/ipv4/ip_gre.c:665 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 ... The splat occurs because skb->data points past skb->head allocated area. This is because neigh layer does: __skb_pull(skb, skb_network_offset(skb)); ... but skb_network_offset() returns a negative offset and __skb_pull() arg is unsigned. IOW, we skb->data gets "adjusted" by a huge value. The negative value is returned because skb->head and skb->data distance is more than 64k and skb->network_header (u16) has wrapped around. The bug is in the ip_tunnel infrastructure, which can cause dev->needed_headroom to increment ad infinitum. The syzkaller reproducer consists of packets getting routed via a gre tunnel, and route of gre encapsulated packets pointing at another (ipip) tunnel. The ipip encapsulation finds gre0 as next output device. This results in the following pattern: 1). First packet is to be sent out via gre0. Route lookup found an output device, ipip0. 2). ip_tunnel_xmit for gre0 bumps gre0->needed_headroom based on the future output device, rt.dev->needed_headroom (ipip0). 3). ip output / start_xmit moves skb on to ipip0. which runs the same code path again (xmit recursion). 4). Routing step for the post-gre0-encap packet finds gre0 as output device to use for ipip0 encapsulated packet. tunl0->needed_headroom is then incremented based on the (already bumped) gre0 device headroom. This repeats for every future packet: gre0->needed_headroom gets inflated because previous packets' ipip0 step incremented rt->dev (gre0) headroom, and ipip0 incremented because gre0 needed_headroom was increased. For each subsequent packet, gre/ipip0->needed_headroom grows until post-expand-head reallocations result in a skb->head/data distance of more than 64k. Once that happens, skb->network_header (u16) wraps around when pskb_expand_head tries to make sure that skb_network_offset() is unchanged after the headroom expansion/reallocation. After this skb_network_offset(skb) returns a different (and negative) result post headroom expansion. The next trip to neigh layer (or anything else that would __skb_pull the network header) makes skb->data point to a memory location outside skb->head area. v2: Cap the needed_headroom update to an arbitarily chosen upperlimit to prevent perpetual increase instead of dropping the headroom increment completely. Reported-and-tested-by: syzbot+bfde3bef047a81b8fde6@syzkaller.appspotmail.com Closes: https://groups.google.com/g/syzkaller-bugs/c/fL9G6GtWskY/m/VKk_PR5FBAAJ Fixes: 243aad830e8a ("ip_gre: include route header_len in max_headroom calculation") Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220135606.4939-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 24961b304dad0..328f9068c6a43 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; -- GitLab From a3c8fa54e904b0ddb52a08cc2d8ac239054f61fd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 20 Feb 2024 16:10:53 +0800 Subject: [PATCH 0434/1418] net: mctp: take ownership of skb in mctp_local_output [ Upstream commit 3773d65ae5154ed7df404b050fd7387a36ab5ef3 ] Currently, mctp_local_output only takes ownership of skb on success, and we may leak an skb if mctp_local_output fails in specific states; the skb ownership isn't transferred until the actual output routing occurs. Instead, make mctp_local_output free the skb on all error paths up to the route action, so it always consumes the passed skb. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220081053.1439104-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/mctp.h | 1 + net/mctp/route.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index 82800d521c3de..7ed84054f4623 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -249,6 +249,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/net/mctp/route.c b/net/mctp/route.c index 256bf0b89e6ca..0144d8ebdaefb 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } -- GitLab From 29360fd3288f3978ccde2f8f7eba22282c4a08a3 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 20 Feb 2024 11:12:07 +0800 Subject: [PATCH 0435/1418] tun: Fix xdp_rxq_info's queue_index when detaching [ Upstream commit 2a770cdc4382b457ca3d43d03f0f0064f905a0d0 ] When a queue(tfile) is detached, we only update tfile's queue_index, but do not update xdp_rxq_info's queue_index. This patch fixes it. Fixes: 8bf5c4ee1889 ("tun: setup xdp_rxq_info") Signed-off-by: Yunjian Wang Link: https://lore.kernel.org/r/1708398727-46308-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 367255bb44cdc..922d6f16d99d1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); -- GitLab From e85b3c15398f6fa1f3941be8acbef79ae114744d Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sat, 17 Feb 2024 13:30:10 -0800 Subject: [PATCH 0436/1418] cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back [ Upstream commit f0a0fc10abb062d122db5ac4ed42f6d1ca342649 ] There is a loophole in pstate limit clamping for the intel_cpufreq CPU frequency scaling driver (intel_pstate in passive mode), schedutil CPU frequency scaling governor, HWP (HardWare Pstate) control enabled, when the adjust_perf call back path is used. Fix it. Fixes: a365ab6b9dfb cpufreq: intel_pstate: Implement the ->adjust_perf() callback Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index abdd26f7d04c9..5771f3fc6115d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; -- GitLab From 7985d73961bbb4e726c1be7b9cd26becc7be8325 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Feb 2024 15:12:10 -0800 Subject: [PATCH 0437/1418] net: veth: clear GRO when clearing XDP even when down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fe9f801355f0b47668419f30f1fac1cf4539e736 ] veth sets NETIF_F_GRO automatically when XDP is enabled, because both features use the same NAPI machinery. The logic to clear NETIF_F_GRO sits in veth_disable_xdp() which is called both on ndo_stop and when XDP is turned off. To avoid the flag from being cleared when the device is brought down, the clearing is skipped when IFF_UP is not set. Bringing the device down should indeed not modify its features. Unfortunately, this means that clearing is also skipped when XDP is disabled _while_ the device is down. And there's nothing on the open path to bring the device features back into sync. IOW if user enables XDP, disables it and then brings the device up we'll end up with a stray GRO flag set but no NAPI instances. We don't depend on the GRO flag on the datapath, so the datapath won't crash. We will crash (or hang), however, next time features are sync'ed (either by user via ethtool or peer changing its config). The GRO flag will go away, and veth will try to disable the NAPIs. But the open path never created them since XDP was off, the GRO flag was a stray. If NAPI was initialized before we'll hang in napi_disable(). If it never was we'll crash trying to stop uninitialized hrtimer. Move the GRO flag updates to the XDP enable / disable paths, instead of mixing them with the ndo_open / ndo_close paths. Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Reported-by: Thomas Gleixner Reported-by: syzbot+039399a9b96297ddedca@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/veth.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 36c5a41f84e44..dea9cc8c39f7a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1558,6 +1541,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1568,6 +1559,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; -- GitLab From 1b0998fdd85776775d975d0024bca227597e836a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 12:17:47 +0000 Subject: [PATCH 0438/1418] ipv6: fix potential "struct net" leak in inet6_rtm_getaddr() [ Upstream commit 10bfd453da64a057bcfd1a49fb6b271c48653cdb ] It seems that if userspace provides a correct IFA_TARGET_NETNSID value but no IFA_ADDRESS and IFA_LOCAL attributes, inet6_rtm_getaddr() returns -EINVAL with an elevated "struct net" refcount. Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR") Signed-off-by: Eric Dumazet Cc: Christian Brauner Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 46527b5cc8f0c..1648373692a99 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5473,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); -- GitLab From c41548fede3d4b0305be2237ba7dbf657e9ff30b Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 22 Feb 2024 13:38:38 +0100 Subject: [PATCH 0439/1418] lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected [ Upstream commit 0e67899abfbfdea0c3c0ed3fd263ffc601c5c157 ] Same as LAN7800, LAN7850 can be used without EEPROM. If EEPROM is not present or not flashed, LAN7850 will fail to sync the speed detected by the PHY with the MAC. In case link speed is 100Mbit, it will accidentally work, otherwise no data can be transferred. Better way would be to implement link_up callback, or set auto speed configuration unconditionally. But this changes would be more intrusive. So, for now, set it only if no EEPROM is found. Fixes: e69647a19c87 ("lan78xx: Set ASD in MAC_CR when EEE is enabled.") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240222123839.2816561-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c458c030fadf6..7b9d480e44fe4 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3035,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ -- GitLab From 548ab66730848c8ed105e1d7caf9f4e3f68cdc94 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 23 Feb 2024 15:59:08 -0800 Subject: [PATCH 0440/1418] veth: try harder when allocating queue memory [ Upstream commit 1ce7d306ea63f3e379557c79abd88052e0483813 ] struct veth_rq is pretty large, 832B total without debug options enabled. Since commit under Fixes we try to pre-allocate enough queues for every possible CPU. Miao Wang reports that this may lead to order-5 allocations which will fail in production. Let the allocation fallback to vmalloc() and try harder. These are the same flags we pass to netdev queue allocation. Reported-and-tested-by: Miao Wang Fixes: 9d3684c24a52 ("veth: create by default nr_possible_cpus queues") Link: https://lore.kernel.org/all/5F52CAE2-2FB7-4712-95F1-3312FBBFA8DD@gmail.com/ Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240223235908.693010-1-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/veth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index dea9cc8c39f7a..dd9f5f1461921 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1359,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1375,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) -- GitLab From d77ab053fb2f97ff366118d4dbffd8fe48168541 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 25 Feb 2024 00:20:06 +0100 Subject: [PATCH 0441/1418] net: usb: dm9601: fix wrong return value in dm9601_mdio_read [ Upstream commit c68b2c9eba38ec3f60f4894b189090febf4d8d22 ] The MII code does not check the return value of mdio_read (among others), and therefore no error code should be sent. A previous fix to the use of an uninitialized variable propagates negative error codes, that might lead to wrong operations by the MII library. An example of such issues is the use of mii_nway_restart by the dm9601 driver. The mii_nway_restart function does not check the value returned by mdio_read, which in this case might be a negative number which could contain the exact bit the function checks (BMCR_ANENABLE = 0x1000). Return zero in case of error, as it is common practice in users of mdio_read to avoid wrong uses of the return value. Fixes: 8f8abb863fa5 ("net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read") Signed-off-by: Javier Carrasco Reviewed-by: Simon Horman Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/20240225-dm9601_ret_err-v1-1-02c1d959ea59@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/dm9601.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972db..8b6d6a1b3c2ec 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, -- GitLab From 1b4223e807fa17bc53062e922e4e7266450e304f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 26 Feb 2024 12:08:20 +0100 Subject: [PATCH 0442/1418] net: lan78xx: fix "softirq work is pending" error [ Upstream commit e3d5d70cb483df8296dd44e9ae3b6355ef86494c ] Disable BH around the call to napi_schedule() to avoid following error: NOHZ tick-stop error: local softirq work is pending, handler #08!!! Fixes: ec4c7e12396b ("lan78xx: Introduce NAPI polling support") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240226110820.2113584-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 7b9d480e44fe4..4fd4563811299 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; -- GitLab From aa5897232682c27ff731b083b40c879b0eb2c994 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 13:49:21 +0100 Subject: [PATCH 0443/1418] uapi: in6: replace temporary label with rfc9486 [ Upstream commit 6a2008641920a9c6fe1abbeb9acbec463215d505 ] Not really a fix per se, but IPV6_TLV_IOAM is still tagged as "TEMPORARY IANA allocation for IOAM", while RFC 9486 is available for some time now. Just update the reference. Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Signed-off-by: Justin Iurman Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240226124921.9097-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/uapi/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab9595..ff8d21f9e95b7 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ -- GitLab From 17ccd9798fe0beda3db212cfa3ebe373f605cbd6 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 26 Feb 2024 17:42:32 +0100 Subject: [PATCH 0444/1418] stmmac: Clear variable when destroying workqueue [ Upstream commit 8af411bbba1f457c33734795f024d0ef26d0963f ] Currently when suspending driver and stopping workqueue it is checked whether workqueue is not NULL and if so, it is destroyed. Function destroy_workqueue() does drain queue and does clear variable, but it does not set workqueue variable to NULL. This can cause kernel/module panic if code attempts to clear workqueue that was not initialized. This scenario is possible when resuming suspended driver in stmmac_resume(), because there is no handling for failed stmmac_hw_setup(), which can fail and return if DMA engine has failed to initialize, and workqueue is initialized after DMA engine. Should DMA engine fail to initialize, resume will proceed normally, but interface won't work and TX queue will eventually timeout, causing 'Reset adapter' error. This then does destroy workqueue during reset process. And since workqueue is initialized after DMA engine and can be skipped, it will cause kernel/module panic. To secure against this possible crash, set workqueue variable to NULL when destroying workqueue. Log/backtrace from crash goes as follows: [88.031977]------------[ cut here ]------------ [88.031985]NETDEV WATCHDOG: eth0 (sxgmac): transmit queue 1 timed out [88.032017]WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:477 dev_watchdog+0x390/0x398 [88.032251]---[ end trace e70de432e4d5c2c0 ]--- [88.032282]sxgmac 16d88000.ethernet eth0: Reset adapter. [88.036359]------------[ cut here ]------------ [88.036519]Call trace: [88.036523] flush_workqueue+0x3e4/0x430 [88.036528] drain_workqueue+0xc4/0x160 [88.036533] destroy_workqueue+0x40/0x270 [88.036537] stmmac_fpe_stop_wq+0x4c/0x70 [88.036541] stmmac_release+0x278/0x280 [88.036546] __dev_close_many+0xcc/0x158 [88.036551] dev_close_many+0xbc/0x190 [88.036555] dev_close.part.0+0x70/0xc0 [88.036560] dev_close+0x24/0x30 [88.036564] stmmac_service_task+0x110/0x140 [88.036569] process_one_work+0x1d8/0x4a0 [88.036573] worker_thread+0x54/0x408 [88.036578] kthread+0x164/0x170 [88.036583] ret_from_fork+0x10/0x20 [88.036588]---[ end trace e70de432e4d5c2c1 ]--- [88.036597]Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 Fixes: 5a5586112b929 ("net: stmmac: support FPE link partner hand-shaking procedure") Signed-off-by: Jakub Raczynski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 91b2aa81914ba..e2d51014ab4bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3900,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } -- GitLab From cad078914b628737fa0946de02169e80fba721cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Tue, 2 Jan 2024 19:08:08 +0100 Subject: [PATCH 0445/1418] Bluetooth: hci_sync: Check the correct flag before starting a scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6b3899be24b16ff8ee0cb25f0bd59b01b15ba1d1 ] There's a very confusing mistake in the code starting a HCI inquiry: We're calling hci_dev_test_flag() to test for HCI_INQUIRY, but hci_dev_test_flag() checks hdev->dev_flags instead of hdev->flags. HCI_INQUIRY is a bit that's set on hdev->flags, not on hdev->dev_flags though. HCI_INQUIRY equals the integer 7, and in hdev->dev_flags, 7 means HCI_BONDABLE, so we were actually checking for HCI_BONDABLE here. The mistake is only present in the synchronous code for starting an inquiry, not in the async one. Also devices are typically bondable while doing an inquiry, so that might be the reason why nobody noticed it so far. Fixes: abfeea476c68 ("Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY") Signed-off-by: Jonas Dreßler Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 45d19294aa772..13ed6cbfade3e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5482,7 +5482,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); -- GitLab From 45085686b9559bfbe3a4f41d3d695a520668f5e1 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Thu, 4 Jan 2024 11:56:32 +0000 Subject: [PATCH 0446/1418] Bluetooth: Avoid potential use-after-free in hci_error_reset [ Upstream commit 2449007d3f73b2842c9734f45f0aadb522daf592 ] While handling the HCI_EV_HARDWARE_ERROR event, if the underlying BT controller is not responding, the GPIO reset mechanism would free the hci_dev and lead to a use-after-free in hci_error_reset. Here's the call trace observed on a ChromeOS device with Intel AX201: queue_work_on+0x3e/0x6c __hci_cmd_sync_sk+0x2ee/0x4c0 [bluetooth ] ? init_wait_entry+0x31/0x31 __hci_cmd_sync+0x16/0x20 [bluetooth ] hci_error_reset+0x4f/0xa4 [bluetooth ] process_one_work+0x1d8/0x33f worker_thread+0x21b/0x373 kthread+0x13a/0x152 ? pr_cont_work+0x54/0x54 ? kthread_blkcg+0x31/0x31 ret_from_fork+0x1f/0x30 This patch holds the reference count on the hci_dev while processing a HCI_EV_HARDWARE_ERROR event to avoid potential crash. Fixes: c7741d16a57c ("Bluetooth: Perform a power cycle when receiving hardware error event") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6a1db678d032f..a8932d449eb63 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) -- GitLab From 926405765f25809602c52e037827d0d5a9f62692 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Jan 2024 10:43:26 -0500 Subject: [PATCH 0447/1418] Bluetooth: hci_sync: Fix accept_list when attempting to suspend [ Upstream commit e5469adb2a7e930d96813316592302d9f8f1df4e ] During suspend, only wakeable devices can be in acceptlist, so if the device was previously added it needs to be removed otherwise the device can end up waking up the system prematurely. Fixes: 3b42055388c3 ("Bluetooth: hci_sync: Fix attempting to suspend with unfiltered passive scan") Signed-off-by: Clancy Shang Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 13ed6cbfade3e..a337340464567 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) -- GitLab From 0b056a52b3adfe5fedf20cd64addbe4e1d226c95 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 9 Jan 2024 19:03:23 +0800 Subject: [PATCH 0448/1418] Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR [ Upstream commit 61a5ab72edea7ebc3ad2c6beea29d966f528ebfb ] hci_store_wake_reason() wrongly parses event HCI_Connection_Request as HCI_Connection_Complete and HCI_Connection_Complete as HCI_Connection_Request, so causes recording wakeup BD_ADDR error and potential stability issue, fix it by using the correct field. Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 56ecc5f97b916..b18f5e5df8ad0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7245,10 +7245,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; -- GitLab From 30a5e812f78e3d1cced90e1ed750bf027599205f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 22 Jan 2024 09:02:47 -0500 Subject: [PATCH 0449/1418] Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST [ Upstream commit 7e74aa53a68bf60f6019bd5d9a9a1406ec4d4865 ] If we received HCI_EV_IO_CAPA_REQUEST while HCI_OP_READ_REMOTE_EXT_FEATURES is yet to be responded assume the remote does support SSP since otherwise this event shouldn't be generated. Link: https://lore.kernel.org/linux-bluetooth/CABBYNZ+9UdG1cMZVmdtN3U2aS16AKMCyTARZZyFX7xTEDWcMOw@mail.gmail.com/T/#t Fixes: c7f59461f5a7 ("Bluetooth: Fix a refcnt underflow problem for hci_conn") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b18f5e5df8ad0..f79aaef5a276d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) -- GitLab From 2dc94c160ef0292d7da7ea2d4c3087c852c97fcc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 25 Jan 2024 14:50:28 +0800 Subject: [PATCH 0450/1418] Bluetooth: Enforce validation on max value of connection interval [ Upstream commit e4b019515f950b4e6e5b74b2e1bb03a90cb33039 ] Right now Linux BT stack cannot pass test case "GAP/CONN/CPUP/BV-05-C 'Connection Parameter Update Procedure Invalid Parameters Central Responder'" in Bluetooth Test Suite revision GAP.TS.p44. [0] That was revoled by commit c49a8682fc5d ("Bluetooth: validate BLE connection interval updates"), but later got reverted due to devices like keyboards and mice may require low connection interval. So only validate the max value connection interval to pass the Test Suite, and let devices to request low connection interval if needed. [0] https://www.bluetooth.org/docman/handlers/DownloadDoc.ashx?doc_id=229869 Fixes: 68d19d7d9957 ("Revert "Bluetooth: validate BLE connection interval updates"") Signed-off-by: Kai-Heng Feng Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/l2cap_core.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f79aaef5a276d..452d839c152fc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6719,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 81f5974e5eb5a..b4cba55be5ad9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else -- GitLab From 7b410226d9eff7f64857a75d65e149440bff2b2f Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 19 Jan 2024 17:45:30 +0800 Subject: [PATCH 0451/1418] Bluetooth: qca: Fix wrong event type for patch config command [ Upstream commit c0dbc56077ae759f2dd602c7561480bc2b1b712c ] Vendor-specific command patch config has HCI_Command_Complete event as response, but qca_send_patch_config_cmd() wrongly expects vendor-specific event for the command, fixed by using right event type. Btmon log for the vendor-specific command are shown below: < HCI Command: Vendor (0x3f|0x0000) plen 5 28 01 00 00 00 > HCI Event: Command Complete (0x0e) plen 5 Vendor (0x3f|0x0000) ncmd 1 Status: Success (0x00) 28 Fixes: 4fac8a7ac80b ("Bluetooth: btqca: sequential validation") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c9064d34d8308..d7d0c9de3dc31 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); -- GitLab From eb7b5777d3c7f5dbbb0736f638068f50006d81b0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 12:13:53 +0100 Subject: [PATCH 0452/1418] Bluetooth: hci_qca: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 44fac8a2fd2f72ee98ee41e6bc9ecc7765b5d3cc ] The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/bluetooth/hci_qca.c:1869:37: error: ‘qca_soc_data_wcn6750’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1853:37: error: ‘qca_soc_data_wcn3998’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1841:37: error: ‘qca_soc_data_wcn3991’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1830:37: error: ‘qca_soc_data_wcn3990’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 76ceb8a0183d1..0e908a337e534 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1824,7 +1824,7 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3990 = { +static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1835,7 +1835,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_wcn3991 = { +static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { .soc_type = QCA_WCN3991, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1847,7 +1847,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn3998 = { +static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .soc_type = QCA_WCN3998, .vregs = (struct qca_vreg []) { { "vddio", 10000 }, @@ -1858,13 +1858,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca6390 = { +static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { .soc_type = QCA_QCA6390, .num_vregs = 0, .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6750 = { +static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { { "vddio", 5000 }, -- GitLab From e5383662fd02ad9516ae2c27f85cd56296372ba9 Mon Sep 17 00:00:00 2001 From: Steev Klimaszewski Date: Sun, 26 Mar 2023 18:38:10 -0500 Subject: [PATCH 0453/1418] Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855 [ Upstream commit 095327fede005f4b14d40b2183b2f7965c739dbd ] Add regulators, GPIOs and changes required to power on/off wcn6855. Add support for firmware download for wcn6855 which is in the linux-firmware repository as hpbtfw21.tlv and hpnv21.bin. Based on the assumption that this is similar to the wcn6750 Tested-on: BTFW.HSP.2.1.0-00538-VER_PATCHZ-1 Signed-off-by: Steev Klimaszewski Reviewed-by: Bjorn Andersson Tested-by: Bjorn Andersson Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 14 ++++++++- drivers/bluetooth/btqca.h | 10 +++++++ drivers/bluetooth/hci_qca.c | 57 ++++++++++++++++++++++++++++--------- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d7d0c9de3dc31..4cb541096b934 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -614,6 +614,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); + } else if (soc_type == QCA_WCN6855) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); } else { snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -648,6 +651,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, else if (soc_type == QCA_WCN6750) snprintf(config.fwname, sizeof(config.fwname), "qca/msnv%02x.bin", rom_ver); + else if (soc_type == QCA_WCN6855) + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); else snprintf(config.fwname, sizeof(config.fwname), "qca/nvm_%08x.bin", soc_ver); @@ -685,11 +691,17 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_WCN6750: + case QCA_WCN6855: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) return err; + break; + default: + break; } bt_dev_info(hdev, "QCA setup on UART is completed"); diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 61e9a50e66ae1..b884095bcd9d0 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -147,6 +147,7 @@ enum qca_btsoc_type { QCA_WCN3991, QCA_QCA6390, QCA_WCN6750, + QCA_WCN6855, }; #if IS_ENABLED(CONFIG_BT_QCA) @@ -168,6 +169,10 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { return soc_type == QCA_WCN6750; } +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return soc_type == QCA_WCN6855; +} #else @@ -206,6 +211,11 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) return false; } +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return false; +} + static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0e908a337e534..f217c2821b9fb 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1315,7 +1315,8 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) /* Give the controller time to process the request */ if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) usleep_range(1000, 10000); else msleep(300); @@ -1392,7 +1393,8 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) { + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) { if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1426,7 +1428,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) * changing the baudrate of chip and host. */ if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, true); if (soc_type == QCA_WCN3990) { @@ -1444,7 +1447,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) error: if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, false); if (soc_type == QCA_WCN3990) { @@ -1680,7 +1684,8 @@ static int qca_power_on(struct hci_dev *hdev) return 0; if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { ret = qca_regulator_init(hu); } else { qcadev = serdev_device_get_drvdata(hu->serdev); @@ -1721,7 +1726,8 @@ static int qca_setup(struct hci_uart *hu) bt_dev_info(hdev, "setting up %s", qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); + (soc_type == QCA_WCN6750) ? "wcn6750" : + (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1733,7 +1739,8 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1755,7 +1762,8 @@ retry: } if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type))) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type))) { /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1881,6 +1889,20 @@ static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn6855 = { + .soc_type = QCA_WCN6855, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddbtcxmx", 126000 }, + { "vddrfacmn", 12500 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p7", 302000 }, + { "vddrfa1p2", 257000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -1910,7 +1932,7 @@ static void qca_power_shutdown(struct hci_uart *hu) host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750) { + } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -2045,7 +2067,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (data && (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type))) { + qca_is_wcn6750(data->soc_type) || + qca_is_wcn6855(data->soc_type))) { qcadev->btsoc_type = data->soc_type; qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), @@ -2065,14 +2088,18 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { + if (IS_ERR_OR_NULL(qcadev->bt_en) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); power_ctrl_enabled = false; } qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) + if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2148,8 +2175,9 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_power *power = qcadev->bt_power; if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) && - power->vregs_on) + qca_is_wcn6750(qcadev->btsoc_type) || + qca_is_wcn6855(qcadev->btsoc_type)) && + power->vregs_on) qca_power_shutdown(&qcadev->serdev_hu); else if (qcadev->susclk) clk_disable_unprepare(qcadev->susclk); @@ -2333,6 +2361,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, + { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 29059d0f3bc21f76db5a375a70a449ba86f3d6cc Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 19 May 2023 18:43:23 +0800 Subject: [PATCH 0454/1418] Bluetooth: btqca: use le32_to_cpu for ver.soc_id [ Upstream commit 8153b738bc547878a017889d2b1cf8dd2de0e0c6 ] Use le32_to_cpu for ver.soc_id to fix the following sparse warning. drivers/bluetooth/btqca.c:640:24: sparse: warning: restricted __le32 degrades to integer Signed-off-by: Min-Hua Chen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4cb541096b934..d40a6041c48cd 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -637,7 +637,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); else if (qca_is_wcn399x(soc_type)) { - if (ver.soc_id == QCA_WCN3991_SOC_ID) { + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), "qca/crnv%02xu.bin", rom_ver); } else { -- GitLab From 940963613275a39fd693fb1969c1c6fbc0798a21 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 2 Aug 2023 08:56:29 +0200 Subject: [PATCH 0455/1418] Bluetooth: btqca: Add WCN3988 support [ Upstream commit f904feefe60c28b6852d5625adc4a2c39426a2d9 ] Add support for the Bluetooth chip codenamed APACHE which is part of WCN3988. The firmware for this chip has a slightly different naming scheme compared to most others. For ROM Version 0x0200 we need to use apbtfw10.tlv + apnv10.bin and for ROM version 0x201 apbtfw11.tlv + apnv11.bin Signed-off-by: Luca Weiss Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 13 +++++++++++-- drivers/bluetooth/btqca.h | 12 ++++++++++-- drivers/bluetooth/hci_qca.c | 12 ++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d40a6041c48cd..d775402b33df3 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -594,14 +594,20 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Firmware files to download are based on ROM version. * ROM version is derived from last two bytes of soc_ver. */ - rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + if (soc_type == QCA_WCN3988) + rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else + rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); if (soc_type == QCA_WCN6750) qca_send_patch_config_cmd(hdev); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (qca_is_wcn399x(soc_type)) { + if (soc_type == QCA_WCN3988) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + } else if (qca_is_wcn399x(soc_type)) { snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); } else if (soc_type == QCA_QCA6390) { @@ -636,6 +642,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (firmware_name) snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); + else if (soc_type == QCA_WCN3988) + snprintf(config.fwname, sizeof(config.fwname), + "qca/apnv%02x.bin", rom_ver); else if (qca_is_wcn399x(soc_type)) { if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index b884095bcd9d0..fc6cf314eb0ef 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -142,6 +142,7 @@ enum qca_btsoc_type { QCA_INVALID = -1, QCA_AR3002, QCA_ROME, + QCA_WCN3988, QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, @@ -162,8 +163,15 @@ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { - return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || - soc_type == QCA_WCN3998; + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + return true; + default: + return false; + } } static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index f217c2821b9fb..746eb096c037c 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1832,6 +1832,17 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; +static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { + .soc_type = QCA_WCN3988, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, + { "vddxo", 80000 }, + { "vddrf", 300000 }, + { "vddch0", 450000 }, + }, + .num_vregs = 4, +}; + static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { @@ -2357,6 +2368,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, + { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, -- GitLab From fc47ed389a884ee4a5b01f62ecae8137b41f63d7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 16 Aug 2023 10:06:47 +0200 Subject: [PATCH 0456/1418] Bluetooth: qca: use switch case for soc type behavior [ Upstream commit 691d54d0f7cb14baac1ff4af210d13c0e4897e27 ] Use switch/case to handle soc type specific behaviour, the permit dropping the qca_is_xxx() inline functions and make the code clearer and easier to update for new SoCs. Suggested-by: Konrad Dybcio Suggested-by: Luiz Augusto von Dentz Signed-off-by: Neil Armstrong Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 87 +++++++++----- drivers/bluetooth/btqca.h | 36 ------ drivers/bluetooth/hci_qca.c | 233 +++++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 120 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d775402b33df3..8331090af86ea 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -604,26 +604,34 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (soc_type == QCA_WCN3988) { - snprintf(config.fwname, sizeof(config.fwname), - "qca/apbtfw%02x.tlv", rom_ver); - } else if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_QCA6390) { + break; + case QCA_WCN3988: + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + break; + case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_WCN6750) { + break; + case QCA_WCN6750: /* Choose mbn file by default.If mbn file is not found * then choose tlv file */ config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - } else if (soc_type == QCA_WCN6855) { + break; + case QCA_WCN6855: snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); - } else { + break; + default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); } @@ -639,33 +647,44 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download NVM configuration */ config.type = TLV_TYPE_NVM; - if (firmware_name) + if (firmware_name) { snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - else if (soc_type == QCA_WCN3988) - snprintf(config.fwname, sizeof(config.fwname), - "qca/apnv%02x.bin", rom_ver); - else if (qca_is_wcn399x(soc_type)) { - if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + } else { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02xu.bin", rom_ver); + } else { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02x.bin", rom_ver); + } + break; + case QCA_WCN3988: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02xu.bin", rom_ver); - } else { + "qca/apnv%02x.bin", rom_ver); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htnv%02x.bin", rom_ver); + break; + case QCA_WCN6750: + snprintf(config.fwname, sizeof(config.fwname), + "qca/msnv%02x.bin", rom_ver); + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); + break; + + default: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02x.bin", rom_ver); + "qca/nvm_%08x.bin", soc_ver); } } - else if (soc_type == QCA_QCA6390) - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6750) - snprintf(config.fwname, sizeof(config.fwname), - "qca/msnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6855) - snprintf(config.fwname, sizeof(config.fwname), - "qca/hpnv%02x.bin", rom_ver); - else - snprintf(config.fwname, sizeof(config.fwname), - "qca/nvm_%08x.bin", soc_ver); err = qca_download_firmware(hdev, &config, soc_type, rom_ver); if (err < 0) { @@ -673,16 +692,24 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type >= QCA_WCN3991) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_QCA6390: + case QCA_WCN6750: + case QCA_WCN6855: err = qca_disable_soc_logging(hdev); if (err < 0) return err; + break; + default: + break; } /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the * VsMsftOpCode. */ switch (soc_type) { + case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fc6cf314eb0ef..fe51c632d7720 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -161,27 +161,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - return true; - default: - return false; - } -} -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6750; -} -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6855; -} - #else static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) @@ -209,21 +188,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return false; -} - static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 746eb096c037c..e6ead996948a8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -606,9 +606,18 @@ static int qca_open(struct hci_uart *hu) if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: hu->init_speed = qcadev->init_speed; + break; + + default: + break; + } if (qcadev->oper_speed) hu->oper_speed = qcadev->oper_speed; @@ -1314,12 +1323,19 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: usleep_range(1000, 10000); - else + break; + + default: msleep(300); + } return 0; } @@ -1392,13 +1408,19 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) { + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; - } else { + break; + + default: if (!qca_get_speed(hu, QCA_INIT_SPEED) || !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1427,14 +1449,28 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) /* Disable flow control for wcn3990 to deassert RTS while * changing the baudrate of chip and host. */ - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: hci_uart_set_flow_control(hu, true); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: reinit_completion(&qca->drop_ev_comp); set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } qca_baudrate = qca_get_baudrate_value(speed); @@ -1446,12 +1482,22 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) host_set_baudrate(hu, speed); error: - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: hci_uart_set_flow_control(hu, false); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: /* Wait for the controller to send the vendor event * for the baudrate change command. */ @@ -1463,6 +1509,10 @@ error: } clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } } @@ -1624,12 +1674,20 @@ static int qca_regulator_init(struct hci_uart *hu) } } - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: /* Forcefully enable wcn399x to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hu, false); if (ret) return ret; + break; + + default: + break; } /* For wcn6750 need to enable gpio bt_en */ @@ -1646,10 +1704,18 @@ static int qca_regulator_init(struct hci_uart *hu) qca_set_speed(hu, QCA_INIT_SPEED); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: ret = qca_send_power_pulse(hu, true); if (ret) return ret; + break; + + default: + break; } /* Now the device is in ready state to communicate with host. @@ -1683,11 +1749,17 @@ static int qca_power_on(struct hci_dev *hdev) if (!hu->serdev) return 0; - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: ret = qca_regulator_init(hu); - } else { + break; + + default: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 1); @@ -1710,6 +1782,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + const char *soc_name; ret = qca_check_speeds(hu); if (ret) @@ -1724,10 +1797,26 @@ static int qca_setup(struct hci_uart *hu) */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - bt_dev_info(hdev, "setting up %s", - qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : - (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + soc_name = "wcn399x"; + break; + + case QCA_WCN6750: + soc_name = "wcn6750"; + break; + + case QCA_WCN6855: + soc_name = "wcn6855"; + break; + + default: + soc_name = "ROME/QCA6390"; + } + bt_dev_info(hdev, "setting up %s", soc_name); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1738,16 +1827,22 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) goto out; - } else { + break; + + default: qca_set_speed(hu, QCA_INIT_SPEED); } @@ -1761,9 +1856,16 @@ retry: qca_baudrate = qca_get_baudrate_value(speed); } - if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type))) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + break; + + default: /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1939,11 +2041,18 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { + break; + + case QCA_WCN6750: + case QCA_WCN6855: gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -1951,7 +2060,9 @@ static void qca_power_shutdown(struct hci_uart *hu) sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); } - } else if (qcadev->bt_en) { + break; + + default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2076,11 +2187,18 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); - if (data && - (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type) || - qca_is_wcn6855(data->soc_type))) { + if (data) qcadev->btsoc_type = data->soc_type; + else + qcadev->btsoc_type = QCA_ROME; + + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2124,12 +2242,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) BT_ERR("wcn3990 serdev registration failed"); return err; } - } else { - if (data) - qcadev->btsoc_type = data->soc_type; - else - qcadev->btsoc_type = QCA_ROME; + break; + default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR_OR_NULL(qcadev->bt_en)) { @@ -2185,13 +2300,23 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct qca_power *power = qcadev->bt_power; - if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type) || - qca_is_wcn6855(qcadev->btsoc_type)) && - power->vregs_on) - qca_power_shutdown(&qcadev->serdev_hu); - else if (qcadev->susclk) - clk_disable_unprepare(qcadev->susclk); + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + if (power->vregs_on) { + qca_power_shutdown(&qcadev->serdev_hu); + break; + } + fallthrough; + + default: + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); + } hci_uart_unregister_device(&qcadev->serdev_hu); } -- GitLab From 67ffc334b92a96e65b627b6b3349c25946ff69f6 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 16 Aug 2023 10:06:48 +0200 Subject: [PATCH 0457/1418] Bluetooth: qca: add support for WCN7850 [ Upstream commit e0c1278ac89b0390fe9a74f673b6f25172292db2 ] Add support for the WCN7850 Bluetooth chipset. Tested on the SM8550 QRD platform. Signed-off-by: Neil Armstrong Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 10 ++++++++++ drivers/bluetooth/btqca.h | 1 + drivers/bluetooth/hci_qca.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8331090af86ea..0211f704a358b 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -631,6 +631,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtbtfw%02x.tlv", rom_ver); + break; default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -679,6 +683,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpnv%02x.bin", rom_ver); break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtnv%02x.bin", rom_ver); + break; default: snprintf(config.fwname, sizeof(config.fwname), @@ -697,6 +705,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_QCA6390: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: err = qca_disable_soc_logging(hdev); if (err < 0) return err; @@ -731,6 +740,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_WCN3991: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fe51c632d7720..03bff5c0059de 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -149,6 +149,7 @@ enum qca_btsoc_type { QCA_QCA6390, QCA_WCN6750, QCA_WCN6855, + QCA_WCN7850, }; #if IS_ENABLED(CONFIG_BT_QCA) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e6ead996948a8..43abdaf92a0ed 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1330,6 +1330,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: usleep_range(1000, 10000); break; @@ -1415,6 +1416,7 @@ static int qca_check_speeds(struct hci_uart *hu) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1456,6 +1458,7 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, true); break; @@ -1489,6 +1492,7 @@ error: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, false); break; @@ -1756,6 +1760,7 @@ static int qca_power_on(struct hci_dev *hdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: ret = qca_regulator_init(hu); break; @@ -1813,6 +1818,10 @@ static int qca_setup(struct hci_uart *hu) soc_name = "wcn6855"; break; + case QCA_WCN7850: + soc_name = "wcn7850"; + break; + default: soc_name = "ROME/QCA6390"; } @@ -1834,6 +1843,7 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1863,6 +1873,7 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: break; default: @@ -2016,6 +2027,20 @@ static const struct qca_device_data qca_soc_data_wcn6855 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { + .soc_type = QCA_WCN7850, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddaon", 26000 }, + { "vdddig", 126000 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p2", 257000 }, + { "vddrfa1p9", 302000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -2199,6 +2224,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2228,7 +2254,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) GPIOD_IN); if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) + data->soc_type == QCA_WCN6855 || + data->soc_type == QCA_WCN7850)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2307,6 +2334,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: if (power->vregs_on) { qca_power_shutdown(&qcadev->serdev_hu); break; @@ -2499,6 +2527,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, + { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 92b8a3273f3812ba441d2a842d602ff7d33362b3 Mon Sep 17 00:00:00 2001 From: Janaki Ramaiah Thota Date: Wed, 24 Jan 2024 20:00:42 +0530 Subject: [PATCH 0458/1418] Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT [ Upstream commit 7dcd3e014aa7faeeaf4047190b22d8a19a0db696 ] BT adapter going into UNCONFIGURED state during BT turn ON when devicetree has no local-bd-address node. Bluetooth will not work out of the box on such devices, to avoid this problem, added check to set HCI_QUIRK_USE_BDADDR_PROPERTY based on local-bd-address node entry. When this quirk is not set, the public Bluetooth address read by host from controller though HCI Read BD Address command is considered as valid. Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Signed-off-by: Janaki Ramaiah Thota Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 43abdaf92a0ed..8bfef7f81b417 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1844,7 +1845,17 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); -- GitLab From ddf6ee3df30b694ac0a66b243245e5b89b6162e2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Thu, 22 Feb 2024 10:33:08 +0000 Subject: [PATCH 0459/1418] netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate() [ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ] Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added some validation of NFPROTO_* families in the nft_compat module, but it broke the ability to use legacy iptables modules in dual-stack nftables. While with legacy iptables one had to independently manage IPv4 and IPv6 tables, with nftables it is possible to have dual-stack tables sharing the rules. Moreover, it was possible to use rules based on legacy iptables match/target modules in dual-stack nftables. As an example, the program from [2] creates an INET dual-stack family table using an xt_bpf based rule, which looks like the following (the actual output was generated with a patched nft tool as the current nft tool does not parse dual stack tables with legacy match rules, so consider it for illustrative purposes only): table inet testfw { chain input { type filter hook prerouting priority filter; policy accept; bytecode counter packets 0 bytes 0 accept } } After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get EOPNOTSUPP for the above program. Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also restrict the functions to classic iptables hooks. Changes in v3: * clarify that upstream nft will not display such configuration properly and that the output was generated with a patched nft tool * remove example program from commit description and link to it instead * no code changes otherwise Changes in v2: * restrict nft_(match/target)_validate() to classic iptables hooks * rewrite example program to use unmodified libnftnl Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1] Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2] Reported-by: Jordan Griege Signed-off-by: Ignat Korchagin Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index e1623fbf36548..e4b8c02c5e6ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); -- GitLab From b8afc22a1160121d108d7ea8496f133804d69b93 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Feb 2023 14:45:22 +0100 Subject: [PATCH 0460/1418] netfilter: let reset rules clean out conntrack entries [ Upstream commit 2954fe60e33da0f4de4d81a4c95c7dddb517d00c ] iptables/nftables support responding to tcp packets with tcp resets. The generated tcp reset packet passes through both output and postrouting netfilter hooks, but conntrack will never see them because the generated skb has its ->nfct pointer copied over from the packet that triggered the reset rule. If the reset rule is used for established connections, this may result in the conntrack entry to be around for a very long time (default timeout is 5 days). One way to avoid this would be to not copy the nf_conn pointer so that the rest packet passes through conntrack too. Problem is that output rules might not have the same conntrack zone setup as the prerouting ones, so its possible that the reset skb won't find the correct entry. Generating a template entry for the skb seems error prone as well. Add an explicit "closing" function that switches a confirmed conntrack entry to closed state and wire this up for tcp. If the entry isn't confirmed, no action is needed because the conntrack entry will never be committed to the table. Reported-by: Russel King Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Signed-off-by: Sasha Levin --- include/linux/netfilter.h | 3 +++ include/net/netfilter/nf_conntrack.h | 8 ++++++ net/ipv4/netfilter/nf_reject_ipv4.c | 1 + net/ipv6/netfilter/nf_reject_ipv6.c | 1 + net/netfilter/core.c | 16 ++++++++++++ net/netfilter/nf_conntrack_core.c | 12 +++++++++ net/netfilter/nf_conntrack_proto_tcp.c | 35 ++++++++++++++++++++++++++ 7 files changed, 76 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bef8db9d6c085..c8e03bcaecaaa 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #include void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) @@ -459,6 +461,7 @@ struct nf_ct_hook { bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa4644..3dbf947285be2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -125,6 +125,12 @@ struct nf_conn { union nf_conntrack_proto proto; }; +static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { @@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) void nf_ct_destroy(struct nf_conntrack *nfct); +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 4073762996e22..fc761915c5f6f 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, goto free_nskb; nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip_local_out for bridged traffic, the MAC source on diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 433d98bbe33f7..71d692728230e 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip6_local_out for bridged traffic, the MAC source on diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 55a7f72d547cd..edf92074221e2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_conntrack_destroy); +void nf_ct_set_closing(struct nf_conntrack *nfct) +{ + const struct nf_ct_hook *ct_hook; + + if (!nfct) + return; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ct_hook->set_closing(nfct); + + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_set_closing); + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7960262966094..6d30c64a5fe86 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2772,11 +2772,23 @@ err_cachep: return ret; } +static void nf_conntrack_set_closing(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = nf_ct_to_nf_conn(nfct); + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + nf_conntrack_tcp_set_closing(ct); + break; + } +} + static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, + .set_closing = nf_conntrack_set_closing, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e0092bf273fd0..9480e638e5d15 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -913,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +void nf_conntrack_tcp_set_closing(struct nf_conn *ct) +{ + enum tcp_conntrack old_state; + const unsigned int *timeouts; + u32 timeout; + + if (!nf_ct_is_confirmed(ct)) + return; + + spin_lock_bh(&ct->lock); + old_state = ct->proto.tcp.state; + ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; + + if (old_state == TCP_CONNTRACK_CLOSE || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_unlock_bh(&ct->lock); + return; + } + + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) { + const struct nf_tcp_net *tn; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = tn->timeouts; + } + + timeout = timeouts[TCP_CONNTRACK_CLOSE]; + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); + + spin_unlock_bh(&ct->lock); + + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); +} + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) { state->td_end = 0; -- GitLab From 2b1414d5e94e477edff1d2c79030f1d742625ea0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2024 16:17:51 +0100 Subject: [PATCH 0461/1418] netfilter: bridge: confirm multicast packets before passing them up the stack [ Upstream commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9 ] conntrack nf_confirm logic cannot handle cloned skbs referencing the same nf_conn entry, which will happen for multicast (broadcast) frames on bridges. Example: macvlan0 | br0 / \ ethX ethY ethX (or Y) receives a L2 multicast or broadcast packet containing an IP packet, flow is not yet in conntrack table. 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. -> skb->_nfct now references a unconfirmed entry 2. skb is broad/mcast packet. bridge now passes clones out on each bridge interface. 3. skb gets passed up the stack. 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb and schedules a work queue to send them out on the lower devices. The clone skb->_nfct is not a copy, it is the same entry as the original skb. The macvlan rx handler then returns RX_HANDLER_PASS. 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. The Macvlan broadcast worker and normal confirm path will race. This race will not happen if step 2 already confirmed a clone. In that case later steps perform skb_clone() with skb->_nfct already confirmed (in hash table). This works fine. But such confirmation won't happen when eb/ip/nftables rules dropped the packets before they reached the nf_confirm step in postrouting. Pablo points out that nf_conntrack_bridge doesn't allow use of stateful nat, so we can safely discard the nf_conn entry and let inet call conntrack again. This doesn't work for bridge netfilter: skb could have a nat transformation. Also bridge nf prevents re-invocation of inet prerouting via 'sabotage_in' hook. Work around this problem by explicit confirmation of the entry at LOCAL_IN time, before upper layer has a chance to clone the unconfirmed entry. The downside is that this disables NAT and conntrack helpers. Alternative fix would be to add locking to all code parts that deal with unconfirmed packets, but even if that could be done in a sane way this opens up other problems, for example: -m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4 -m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5 For multicast case, only one of such conflicting mappings will be created, conntrack only handles 1:1 NAT mappings. Users should set create a setup that explicitly marks such traffic NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass them, ruleset might have accept rules for untracked traffic already, so user-visible behaviour would change. Suggested-by: Pablo Neira Ayuso Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter.h | 1 + net/bridge/br_netfilter_hooks.c | 96 ++++++++++++++++++++++ net/bridge/netfilter/nf_conntrack_bridge.c | 30 +++++++ net/netfilter/nf_conntrack_core.c | 1 + 4 files changed, 128 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c8e03bcaecaaa..e5f4b6f8d1c09 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -462,6 +462,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 202ad43e35d6b..bff48d5763635 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 06d94b2c6b5de..c7c27ada67044 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, + { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6d30c64a5fe86..024f93fc8c0bb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2789,6 +2789,7 @@ static const struct nf_ct_hook nf_conntrack_hook = { .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) -- GitLab From f2261eb994aa5757c1da046b78e3229a3ece0ad9 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 27 Feb 2024 20:11:28 +0800 Subject: [PATCH 0462/1418] rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back [ Upstream commit 743ad091fb46e622f1b690385bb15e3cd3daf874 ] In the commit d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length"), an adjustment was made to the old loop logic in the function `rtnl_bridge_setlink` to enable the loop to also check the length of the IFLA_BRIDGE_MODE attribute. However, this adjustment removed the `break` statement and led to an error logic of the flags writing back at the end of this function. if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); // attr should point to IFLA_BRIDGE_FLAGS NLA !!! Before the mentioned commit, the `attr` is granted to be IFLA_BRIDGE_FLAGS. However, this is not necessarily true fow now as the updated loop will let the attr point to the last NLA, even an invalid NLA which could cause overflow writes. This patch introduces a new variable `br_flag` to save the NLA pointer that points to IFLA_BRIDGE_FLAGS and uses it to resolve the mentioned error logic. Fixes: d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length") Signed-off-by: Lin Ma Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240227121128.608110-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7cf1e42d7f93b..ac379e4590f8d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } -- GitLab From a0222b48175709b8a66e5f373d17a10ca5659cc8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Feb 2024 10:49:41 -0800 Subject: [PATCH 0463/1418] igb: extend PTP timestamp adjustments to i211 [ Upstream commit 0bb7b09392eb74b152719ae87b1ba5e4bf910ef0 ] The i211 requires the same PTP timestamp adjustments as the i210, according to its datasheet. To ensure consistent timestamping across different platforms, this change extends the existing adjustments to include the i211. The adjustment result are tested and comparable for i210 and i211 based systems. Fixes: 3f544d2a4d5c ("igb: adjust PTP timestamps for Tx/Rx latency") Signed-off-by: Oleksij Rempel Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240227184942.362710-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 07171e574e7d7..36e62197fba0b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; -- GitLab From 7d4121b40149aed0698c7b82384c5c069da91836 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 28 Feb 2024 09:56:44 +0100 Subject: [PATCH 0464/1418] net: hsr: Use correct offset for HSR TLV values in supervisory HSR frames [ Upstream commit 51dd4ee0372228ffb0f7709fa7aa0678d4199d06 ] Current HSR implementation uses following supervisory frame (even for HSRv1 the HSR tag is not is not present): 00000000: 01 15 4e 00 01 2d XX YY ZZ 94 77 10 88 fb 00 01 00000010: 7e 1c 17 06 XX YY ZZ 94 77 10 1e 06 XX YY ZZ 94 00000020: 77 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 The current code adds extra two bytes (i.e. sizeof(struct hsr_sup_tlv)) when offset for skb_pull() is calculated. This is wrong, as both 'struct hsrv1_ethhdr_sp' and 'hsrv0_ethhdr_sp' already have 'struct hsr_sup_tag' defined in them, so there is no need for adding extra two bytes. This code was working correctly as with no RedBox support, the check for HSR_TLV_EOT (0x00) was off by two bytes, which were corresponding to zeroed padded bytes for minimal packet size. Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames") Signed-off-by: Lukasz Majewski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228085644.3618044-1-lukma@denx.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b34c9..0323ab5023c69 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); -- GitLab From 40f0f326cfe6847faaa409f4883b94fcdda468ab Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:43:57 +0100 Subject: [PATCH 0465/1418] tls: decrement decrypt_pending if no async completion will be called [ Upstream commit f7fa16d49837f947ee59492958f9e6f0e51d9a78 ] With mixed sync/async decryption, or failures of crypto_aead_decrypt, we increment decrypt_pending but we never do the corresponding decrement since tls_decrypt_done will not be called. In this case, we should decrement decrypt_pending immediately to avoid getting stuck. For example, the prequeue prequeue test gets stuck with mixed modes (one async decrypt + one sync decrypt). Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/c56d5fc35543891d5319f834f25622360e1bfbec.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 93e1bfa72d791..c6ad435a44218 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk, return 0; ret = crypto_wait_req(ret, &ctx->async_wait); + } else if (darg->async) { + atomic_dec(&ctx->decrypt_pending); } darg->async = false; -- GitLab From 08562ca971ff6d4d30ef7eb3fe932f8bf9dcd841 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:43:58 +0100 Subject: [PATCH 0466/1418] tls: fix peeking with sync+async decryption [ Upstream commit 6caaf104423d809b49a67ee6500191d063b40dc6 ] If we peek from 2 records with a currently empty rx_list, and the first record is decrypted synchronously but the second record is decrypted async, the following happens: 1. decrypt record 1 (sync) 2. copy from record 1 to the userspace's msg 3. queue the decrypted record to rx_list for future read(!PEEK) 4. decrypt record 2 (async) 5. queue record 2 to rx_list 6. call process_rx_list to copy data from the 2nd record We currently pass copied=0 as skip offset to process_rx_list, so we end up copying once again from the first record. We should skip over the data we've already copied. Seen with selftest tls.12_aes_gcm.recv_peek_large_buf_mult_recs Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/1b132d2b2b99296bfde54e8a67672d90d6d16e71.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c6ad435a44218..2bd27b77769cb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2023,6 +2023,7 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -2170,8 +2171,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2210,8 +2213,8 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek, NULL); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); -- GitLab From ddc547dd05a46720866c32022300f7376c40119f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:24:40 +0100 Subject: [PATCH 0467/1418] efi/capsule-loader: fix incorrect allocation size [ Upstream commit fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e ] gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures is not enough for a 64-bit phys_addr_t: drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open': drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size] 295 | cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); | ^ Use the correct type instead here. Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping") Signed-off-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/capsule-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a8140..97bafb5f70389 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); -- GitLab From cefe18e9ec84f8fe3e198ccebb815cc996eb9797 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Feb 2024 16:51:33 +0100 Subject: [PATCH 0468/1418] power: supply: bq27xxx-i2c: Do not free non existing IRQ [ Upstream commit 2df70149e73e79783bcbc7db4fa51ecef0e2022c ] The bq27xxx i2c-client may not have an IRQ, in which case client->irq will be 0. bq27xxx_battery_i2c_probe() already has an if (client->irq) check wrapping the request_threaded_irq(). But bq27xxx_battery_i2c_remove() unconditionally calls free_irq(client->irq) leading to: [ 190.310742] ------------[ cut here ]------------ [ 190.310843] Trying to free already-free IRQ 0 [ 190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310 Followed by a backtrace when unbinding the driver. Add an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring probe() to fix this. Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 0713a52a25107..17b37354e32c0 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); -- GitLab From 7f8644b6a86d45c9f8240734b161896a09069fe5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 21 Feb 2024 10:21:56 +0100 Subject: [PATCH 0469/1418] ALSA: Drop leftover snd-rtctimer stuff from Makefile [ Upstream commit 4df49712eb54141be00a9312547436d55677f092 ] We forgot to remove the line for snd-rtctimer from Makefile while dropping the functionality. Get rid of the stale line. Fixes: 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer") Link: https://lore.kernel.org/r/20240221092156.28695-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/Makefile b/sound/core/Makefile index 2762f03d9b7bc..a7a1590b29526 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o snd-rawmidi-objs := rawmidi.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o -- GitLab From d36b9a1b4e5214abaf864afde5617b021b5cb588 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 23 Feb 2024 16:03:33 +0100 Subject: [PATCH 0470/1418] drm/tegra: Remove existing framebuffer only if we support display [ Upstream commit 86bf8cfda6d2a6720fa2e6e676c98f0882c9d3d7 ] Tegra DRM doesn't support display on Tegra234 and later, so make sure not to remove any existing framebuffers in that case. v2: - add comments explaining how this situation can come about - clear DRIVER_MODESET and DRIVER_ATOMIC feature bits Fixes: 6848c291a54f ("drm/aperture: Convert drivers to aperture interfaces") Signed-off-by: Thierry Reding Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240223150333.1401582-1-thierry.reding@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/drm.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 5fc55b9777cbf..6806779f8ecce 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = tegra_drm_fb_init(drm); if (err < 0) -- GitLab From 2f91a96b892fab2f2543b4a55740c5bee36b1a6b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Feb 2024 12:44:11 +0100 Subject: [PATCH 0471/1418] fbcon: always restore the old font data in fbcon_do_set_font() [ Upstream commit 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f ] Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when vc_resize() failed) started restoring old font data upon failure (of vc_resize()). But it performs so only for user fonts. It means that the "system"/internal fonts are not restored at all. So in result, the very first call to fbcon_do_set_font() performs no restore at all upon failing vc_resize(). This can be reproduced by Syzkaller to crash the system on the next invocation of font_get(). It's rather hard to hit the allocation failure in vc_resize() on the first font_set(), but not impossible. Esp. if fault injection is used to aid the execution/failure. It was demonstrated by Sirius: BUG: unable to handle page fault for address: fffffffffffffff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 Call Trace: con_font_get drivers/tty/vt/vt.c:4558 [inline] con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 vfs_ioctl fs/ioctl.c:51 [inline] ... So restore the font data in any case, not only for user fonts. Note the later 'if' is now protected by 'old_userfont' and not 'old_data' as the latter is always set now. (And it is supposed to be non-NULL. Otherwise we would see the bug above again.) Signed-off-by: Jiri Slaby (SUSE) Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") Reported-and-tested-by: Ubisectech Sirius Cc: Ubisectech Sirius Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index fa205be94a4b8..14498a0d13e0b 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; -- GitLab From 058ed71e0f7aa3b6694ca357e23d084e5d3f2470 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Feb 2024 13:15:02 +0000 Subject: [PATCH 0472/1418] afs: Fix endless loop in directory parsing [ Upstream commit 5f7a07646655fb4108da527565dcdc80124b14c4 ] If a directory has a block with only ".__afsXXXX" files in it (from uncompleted silly-rename), these .__afsXXXX files are skipped but without advancing the file position in the dir_context. This leads to afs_dir_iterate() repeating the block again and again. Fix this by making the code that skips the .__afsXXXX file also manually advance the file position. The symptoms are a soft lookup: watchdog: BUG: soft lockup - CPU#3 stuck for 52s! [check:5737] ... RIP: 0010:afs_dir_iterate_block+0x39/0x1fd ... ? watchdog_timer_fn+0x1a6/0x213 ... ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? afs_dir_iterate_block+0x39/0x1fd afs_dir_iterate+0x10a/0x148 afs_readdir+0x30/0x4a iterate_dir+0x93/0xd3 __do_sys_getdents64+0x6b/0xd4 This is almost certainly the actual fix for: https://bugzilla.kernel.org/show_bug.cgi?id=218496 Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") Signed-off-by: David Howells Link: https://lore.kernel.org/r/786185.1708694102@warthog.procyon.org.uk Reviewed-by: Marc Dionne cc: Marc Dionne cc: Markus Suvanto cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cf811b77ee671..6e2c967fae6fc 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, dire->u.name[0] == '.' && ctx->actor != afs_lookup_filldir && ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) + memcmp(dire->u.name, ".__afs", 6) == 0) { + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, -- GitLab From 8310080799b40fd9f2a8b808c657269678c149af Mon Sep 17 00:00:00 2001 From: Dimitris Vlachos Date: Thu, 29 Feb 2024 21:17:23 +0200 Subject: [PATCH 0473/1418] riscv: Sparse-Memory/vmemmap out-of-bounds fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a11dd49dcb9376776193e15641f84fcc1e5980c9 ] Offset vmemmap so that the first page of vmemmap will be mapped to the first page of physical memory in order to ensure that vmemmap’s bounds will be respected during pfn_to_page()/page_to_pfn() operations. The conversion macros will produce correct SV39/48/57 addresses for every possible/valid DRAM_BASE inside the physical memory limits. v2:Address Alex's comments Suggested-by: Alexandre Ghiti Signed-off-by: Dimitris Vlachos Reported-by: Dimitris Vlachos Closes: https://lore.kernel.org/linux-riscv/20240202135030.42265-1-csd4492@csd.uoc.gr Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240229191723.32779-1-dvlachos@ics.forth.gr Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 59bb53da473dd..63055c6ad2c25 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -79,7 +79,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START -- GitLab From 8f626221e5fa89134515d358e7d614609b612a5c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 23 Feb 2024 21:24:35 -0800 Subject: [PATCH 0474/1418] of: property: fw_devlink: Fix stupid bug in remote-endpoint parsing [ Upstream commit 7cb50f6c9fbaa1c0b80100b8971bf13db5d75d06 ] Introduced a stupid bug in commit 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") due to a last minute incorrect edit of "index !=0" into "!index". This patch fixes it to be "index > 0" to match the comment right next to it. Reported-by: Luca Ceresoli Link: https://lore.kernel.org/lkml/20240223171849.10f9901d@booty/ Fixes: 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") Signed-off-by: Saravana Kannan Reviewed-by: Herve Codina Reviewed-by: Luca Ceresoli Tested-by: Luca Ceresoli Link: https://lore.kernel.org/r/20240224052436.3552333-1-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 33d5f16c81204..da5d712197704 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1332,7 +1332,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ - if (!index || strcmp(prop_name, "remote-endpoint")) + if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); -- GitLab From 3bfe04c1273d30b866f4c7c238331ed3b08e5824 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Mar 2024 22:04:06 +0900 Subject: [PATCH 0475/1418] tomoyo: fix UAF write bug in tomoyo_write_control() commit 2f03fc340cac9ea1dc63cbf8c93dd2eb0f227815 upstream. Since tomoyo_write_control() updates head->write_buf when write() of long lines is requested, we need to fetch head->write_buf after head->io_sem is held. Otherwise, concurrent write() requests can cause use-after-free-write and double-free problems. Reported-by: Sam Sun Closes: https://lkml.kernel.org/r/CAEkJfYNDspuGxYx5kym8Lvp--D36CMDUErg4rxfWFJuPbbji8g@mail.gmail.com Fixes: bd03a3e4c9a9 ("TOMOYO: Add policy namespace support.") Cc: # Linux 3.1+ Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- security/tomoyo/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index f4cd9b58b2054..a7af085550b2d 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ -- GitLab From 8cec41a35065dcfcca5a2337f4edd56dadd1425c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Feb 2024 12:30:26 +0900 Subject: [PATCH 0476/1418] ALSA: firewire-lib: fix to check cycle continuity commit 77ce96543b03f437c6b45f286d8110db2b6622a3 upstream. The local helper function to compare the given pair of cycle count evaluates them. If the left value is less than the right value, the function returns negative value. If the safe cycle is less than the current cycle, it is the case of cycle lost. However, it is not currently handled properly. This commit fixes the bug. Cc: Fixes: 705794c53b00 ("ALSA: firewire-lib: check cycle continuity") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20240218033026.72577-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9be2260e4ca2d..f8b644cb9157a 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", -- GitLab From 4cbbc2f0dbe22498e290997c52f088413d6b9ad5 Mon Sep 17 00:00:00 2001 From: Hans Peter Date: Mon, 19 Feb 2024 17:38:49 +0100 Subject: [PATCH 0477/1418] ALSA: hda/realtek: Enable Mute LED on HP 840 G8 (MB 8AB8) commit 1fdf4e8be7059e7784fec11d30cd32784f0bdc83 upstream. On my EliteBook 840 G8 Notebook PC (ProdId 5S7R6EC#ABD; built 2022 for german market) the Mute LED is always on. The mute button itself works as expected. alsa-info.sh shows a different subsystem-id 0x8ab9 for Realtek ALC285 Codec, thus the existing quirks for HP 840 G8 don't work. Therefore, add a new quirk for this type of EliteBook. Signed-off-by: Hans Peter Cc: Link: https://lore.kernel.org/r/20240219164518.4099-1-flurry123@gmx.ch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 92a656fb53212..53fe38a1b5da3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9687,6 +9687,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- GitLab From fd3289ab8ed1f8a2f6e3593adf39bb610fbc17a5 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Tue, 20 Feb 2024 17:58:12 +0000 Subject: [PATCH 0478/1418] ALSA: hda/realtek: fix mute/micmute LED For HP mt440 commit 67c3d7717efbd46092f217b1f811df1b205cce06 upstream. The HP mt440 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240220175812.782687-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53fe38a1b5da3..75bd7b2fa4ee6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9662,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9693,6 +9694,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- GitLab From 59ed284c7bff4da0f6cafd05ca15de1c0ae1d087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 19 Feb 2024 20:03:45 +0100 Subject: [PATCH 0479/1418] landlock: Fix asymmetric private inodes referring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9818b3e906a0ee1ab02ea79e74a2f755fc5461a upstream. When linking or renaming a file, if only one of the source or destination directory is backed by an S_PRIVATE inode, then the related set of layer masks would be used as uninitialized by is_access_to_paths_allowed(). This would result to indeterministic access for one side instead of always being allowed. This bug could only be triggered with a mounted filesystem containing both S_PRIVATE and !S_PRIVATE inodes, which doesn't seem possible. The collect_domain_accesses() calls return early if is_nouser_or_private() returns false, which means that the directory's superblock has SB_NOUSER or its inode has S_PRIVATE. Because rename or link actions are only allowed on the same mounted filesystem, the superblock is always the same for both source and destination directories. However, it might be possible in theory to have an S_PRIVATE parent source inode with an !S_PRIVATE parent destination inode, or vice versa. To make sure this case is not an issue, explicitly initialized both set of layer masks to 0, which means to allow all actions on the related side. If at least on side has !S_PRIVATE, then collect_domain_accesses() and is_access_to_paths_allowed() check for the required access rights. Cc: Arnd Bergmann Cc: Christian Brauner Cc: Günther Noack Cc: Jann Horn Cc: Shervin Oloumi Cc: stable@vger.kernel.org Fixes: b91c3e4ea756 ("landlock: Add support for file reparenting with LANDLOCK_ACCESS_FS_REFER") Link: https://lore.kernel.org/r/20240219190345.2928627-1-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- security/landlock/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 64ed7665455fe..d328965f32f7f 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; -- GitLab From abd32d7f5c0294c1b2454c5a3b13b18446bac627 Mon Sep 17 00:00:00 2001 From: Alexander Ofitserov Date: Wed, 28 Feb 2024 14:47:03 +0300 Subject: [PATCH 0480/1418] gtp: fix use-after-free and null-ptr-deref in gtp_newlink() commit 616d82c3cfa2a2146dd7e3ae47bda7e877ee549e upstream. The gtp_link_ops operations structure for the subsystem must be registered after registering the gtp_net_ops pernet operations structure. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: [ 1010.702740] gtp: GTP module unloaded [ 1010.715877] general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] SMP KASAN NOPTI [ 1010.715888] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] [ 1010.715895] CPU: 1 PID: 128616 Comm: a.out Not tainted 6.8.0-rc6-std-def-alt1 #1 [ 1010.715899] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 [ 1010.715908] RIP: 0010:gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.715915] Code: 80 3c 02 00 0f 85 41 04 00 00 48 8b bb d8 05 00 00 e8 ed f6 ff ff 48 89 c2 48 89 c5 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 4f 04 00 00 4c 89 e2 4c 8b 6d 00 48 b8 00 00 00 [ 1010.715920] RSP: 0018:ffff888020fbf180 EFLAGS: 00010203 [ 1010.715929] RAX: dffffc0000000000 RBX: ffff88800399c000 RCX: 0000000000000000 [ 1010.715933] RDX: 0000000000000001 RSI: ffffffff84805280 RDI: 0000000000000282 [ 1010.715938] RBP: 000000000000000d R08: 0000000000000001 R09: 0000000000000000 [ 1010.715942] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88800399cc80 [ 1010.715947] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000400 [ 1010.715953] FS: 00007fd1509ab5c0(0000) GS:ffff88805b300000(0000) knlGS:0000000000000000 [ 1010.715958] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1010.715962] CR2: 0000000000000000 CR3: 000000001c07a000 CR4: 0000000000750ee0 [ 1010.715968] PKRU: 55555554 [ 1010.715972] Call Trace: [ 1010.715985] ? __die_body.cold+0x1a/0x1f [ 1010.715995] ? die_addr+0x43/0x70 [ 1010.716002] ? exc_general_protection+0x199/0x2f0 [ 1010.716016] ? asm_exc_general_protection+0x1e/0x30 [ 1010.716026] ? gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.716034] ? gtp_net_exit+0x150/0x150 [gtp] [ 1010.716042] __rtnl_newlink+0x1063/0x1700 [ 1010.716051] ? rtnl_setlink+0x3c0/0x3c0 [ 1010.716063] ? is_bpf_text_address+0xc0/0x1f0 [ 1010.716070] ? kernel_text_address.part.0+0xbb/0xd0 [ 1010.716076] ? __kernel_text_address+0x56/0xa0 [ 1010.716084] ? unwind_get_return_address+0x5a/0xa0 [ 1010.716091] ? create_prof_cpu_mask+0x30/0x30 [ 1010.716098] ? arch_stack_walk+0x9e/0xf0 [ 1010.716106] ? stack_trace_save+0x91/0xd0 [ 1010.716113] ? stack_trace_consume_entry+0x170/0x170 [ 1010.716121] ? __lock_acquire+0x15c5/0x5380 [ 1010.716139] ? mark_held_locks+0x9e/0xe0 [ 1010.716148] ? kmem_cache_alloc_trace+0x35f/0x3c0 [ 1010.716155] ? __rtnl_newlink+0x1700/0x1700 [ 1010.716160] rtnl_newlink+0x69/0xa0 [ 1010.716166] rtnetlink_rcv_msg+0x43b/0xc50 [ 1010.716172] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716179] ? lock_acquire+0x1fe/0x560 [ 1010.716188] ? netlink_deliver_tap+0x12f/0xd50 [ 1010.716196] netlink_rcv_skb+0x14d/0x440 [ 1010.716202] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716208] ? netlink_ack+0xab0/0xab0 [ 1010.716213] ? netlink_deliver_tap+0x202/0xd50 [ 1010.716220] ? netlink_deliver_tap+0x218/0xd50 [ 1010.716226] ? __virt_addr_valid+0x30b/0x590 [ 1010.716233] netlink_unicast+0x54b/0x800 [ 1010.716240] ? netlink_attachskb+0x870/0x870 [ 1010.716248] ? __check_object_size+0x2de/0x3b0 [ 1010.716254] netlink_sendmsg+0x938/0xe40 [ 1010.716261] ? netlink_unicast+0x800/0x800 [ 1010.716269] ? __import_iovec+0x292/0x510 [ 1010.716276] ? netlink_unicast+0x800/0x800 [ 1010.716284] __sock_sendmsg+0x159/0x190 [ 1010.716290] ____sys_sendmsg+0x712/0x880 [ 1010.716297] ? sock_write_iter+0x3d0/0x3d0 [ 1010.716304] ? __ia32_sys_recvmmsg+0x270/0x270 [ 1010.716309] ? lock_acquire+0x1fe/0x560 [ 1010.716315] ? drain_array_locked+0x90/0x90 [ 1010.716324] ___sys_sendmsg+0xf8/0x170 [ 1010.716331] ? sendmsg_copy_msghdr+0x170/0x170 [ 1010.716337] ? lockdep_init_map_type+0x2c7/0x860 [ 1010.716343] ? lockdep_hardirqs_on_prepare+0x430/0x430 [ 1010.716350] ? debug_mutex_init+0x33/0x70 [ 1010.716360] ? percpu_counter_add_batch+0x8b/0x140 [ 1010.716367] ? lock_acquire+0x1fe/0x560 [ 1010.716373] ? find_held_lock+0x2c/0x110 [ 1010.716384] ? __fd_install+0x1b6/0x6f0 [ 1010.716389] ? lock_downgrade+0x810/0x810 [ 1010.716396] ? __fget_light+0x222/0x290 [ 1010.716403] __sys_sendmsg+0xea/0x1b0 [ 1010.716409] ? __sys_sendmsg_sock+0x40/0x40 [ 1010.716419] ? lockdep_hardirqs_on_prepare+0x2b3/0x430 [ 1010.716425] ? syscall_enter_from_user_mode+0x1d/0x60 [ 1010.716432] do_syscall_64+0x30/0x40 [ 1010.716438] entry_SYSCALL_64_after_hwframe+0x62/0xc7 [ 1010.716444] RIP: 0033:0x7fd1508cbd49 [ 1010.716452] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ef 70 0d 00 f7 d8 64 89 01 48 [ 1010.716456] RSP: 002b:00007fff18872348 EFLAGS: 00000202 ORIG_RAX: 000000000000002e [ 1010.716463] RAX: ffffffffffffffda RBX: 000055f72bf0eac0 RCX: 00007fd1508cbd49 [ 1010.716468] RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000006 [ 1010.716473] RBP: 00007fff18872360 R08: 00007fff18872360 R09: 00007fff18872360 [ 1010.716478] R10: 00007fff18872360 R11: 0000000000000202 R12: 000055f72bf0e1b0 [ 1010.716482] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 1010.716491] Modules linked in: gtp(+) udp_tunnel ib_core uinput af_packet rfkill qrtr joydev hid_generic usbhid hid kvm_intel iTCO_wdt intel_pmc_bxt iTCO_vendor_support kvm snd_hda_codec_generic ledtrig_audio irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel snd_hda_intel nls_utf8 snd_intel_dspcfg nls_cp866 psmouse aesni_intel vfat crypto_simd fat cryptd glue_helper snd_hda_codec pcspkr snd_hda_core i2c_i801 snd_hwdep i2c_smbus xhci_pci snd_pcm lpc_ich xhci_pci_renesas xhci_hcd qemu_fw_cfg tiny_power_button button sch_fq_codel vboxvideo drm_vram_helper drm_ttm_helper ttm vboxsf vboxguest snd_seq_midi snd_seq_midi_event snd_seq snd_rawmidi snd_seq_device snd_timer snd soundcore msr fuse efi_pstore dm_mod ip_tables x_tables autofs4 virtio_gpu virtio_dma_buf drm_kms_helper cec rc_core drm virtio_rng virtio_scsi rng_core virtio_balloon virtio_blk virtio_net virtio_console net_failover failover ahci libahci libata evdev scsi_mod input_leds serio_raw virtio_pci intel_agp [ 1010.716674] virtio_ring intel_gtt virtio [last unloaded: gtp] [ 1010.716693] ---[ end trace 04990a4ce61e174b ]--- Cc: stable@vger.kernel.org Signed-off-by: Alexander Ofitserov Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228114703.465107-1-oficerovas@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 937dd9cf4fbaf..7086acfed5b90 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1902,26 +1902,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = register_pernet_subsys(>p_net_ops); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_pernet_subsys; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_pernet_subsys: - unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; -- GitLab From 930e826962d9f01dcd2220176134427358d112f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Feb 2024 20:08:35 +0100 Subject: [PATCH 0481/1418] wifi: nl80211: reject iftype change with mesh ID change commit f78c1375339a291cba492a70eaf12ec501d28a8e upstream. It's currently possible to change the mesh ID when the interface isn't yet in mesh mode, at the same time as changing it into mesh mode. This leads to an overwrite of data in the wdev->u union for the interface type it currently has, causing cfg80211_change_iface() to do wrong things when switching. We could probably allow setting an interface to mesh while setting the mesh ID at the same time by doing a different order of operations here, but realistically there's no userspace that's going to do this, so just disallow changes in iftype when setting mesh ID. Cc: stable@vger.kernel.org Fixes: 29cbe68c516a ("cfg80211/mac80211: add mesh join/leave commands") Reported-by: syzbot+dd4779978217b1973180@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c259d3227a9e2..1a3bd554e2586 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4137,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; -- GitLab From c34adc20b91a8e55e048b18d63f4f4ae003ecf8f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Feb 2024 16:38:43 +0000 Subject: [PATCH 0482/1418] btrfs: fix double free of anonymous device after snapshot creation failure commit e2b54eaf28df0c978626c9736b94f003b523b451 upstream. When creating a snapshot we may do a double free of an anonymous device in case there's an error committing the transaction. The second free may result in freeing an anonymous device number that was allocated by some other subsystem in the kernel or another btrfs filesystem. The steps that lead to this: 1) At ioctl.c:create_snapshot() we allocate an anonymous device number and assign it to pending_snapshot->anon_dev; 2) Then we call btrfs_commit_transaction() and end up at transaction.c:create_pending_snapshot(); 3) There we call btrfs_get_new_fs_root() and pass it the anonymous device number stored in pending_snapshot->anon_dev; 4) btrfs_get_new_fs_root() frees that anonymous device number because btrfs_lookup_fs_root() returned a root - someone else did a lookup of the new root already, which could some task doing backref walking; 5) After that some error happens in the transaction commit path, and at ioctl.c:create_snapshot() we jump to the 'fail' label, and after that we free again the same anonymous device number, which in the meanwhile may have been reallocated somewhere else, because pending_snapshot->anon_dev still has the same value as in step 1. Recently syzbot ran into this and reported the following trace: ------------[ cut here ]------------ ida_free called for id=51 which is not allocated. WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525 Modules linked in: CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525 Code: 10 42 80 3c 28 (...) RSP: 0018:ffffc90015a67300 EFLAGS: 00010246 RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000 RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4 R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246 R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246 FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0 Call Trace: btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346 create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837 create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931 btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404 create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848 btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998 btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044 __btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306 btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393 btrfs_ioctl+0xa74/0xd40 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fca3e67dda9 Code: 28 00 00 00 (...) RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9 RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003 RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658 Where we get an explicit message where we attempt to free an anonymous device number that is not currently allocated. It happens in a different code path from the example below, at btrfs_get_root_ref(), so this change may not fix the case triggered by syzbot. To fix at least the code path from the example above, change btrfs_get_root_ref() and its callers to receive a dev_t pointer argument for the anonymous device number, so that in case it frees the number, it also resets it to 0, so that up in the call chain we don't attempt to do the double free. CC: stable@vger.kernel.org # 5.10+ Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/ Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 22 +++++++++++----------- fs/btrfs/disk-io.h | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/transaction.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d1b05ded1e35..5756edb37c61e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1668,9 +1668,9 @@ again: * that common but still possible. In that case, we just need * to free the anon_dev. */ - if (unlikely(anon_dev)) { - free_anon_bdev(anon_dev); - anon_dev = 0; + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { @@ -1692,7 +1692,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1728,7 +1728,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1744,7 +1744,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1752,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7322af63c0cc7..24bddca86e9c9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 196e222749ccd..64b37afb7c87f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60db4c3b82fa1..b172091f42612 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; -- GitLab From f590040ce2b712177306b03c2a63b16f7d48d3c8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 14 Feb 2024 16:19:24 +0100 Subject: [PATCH 0483/1418] btrfs: dev-replace: properly validate device names commit 9845664b9ee47ce7ee7ea93caf47d39a9d4552c4 upstream. There's a syzbot report that device name buffers passed to device replace are not properly checked for string termination which could lead to a read out of bounds in getname_kernel(). Add a helper that validates both source and target device name buffers. For devid as the source initialize the buffer to empty string in case something tries to read it later. This was originally analyzed and fixed in a different way by Edward Adam Davis (see links). Link: https://lore.kernel.org/linux-btrfs/000000000000d1a1d1060cc9c5e7@google.com/ Link: https://lore.kernel.org/linux-btrfs/tencent_44CA0665C9836EF9EEC80CB9E7E206DF5206@qq.com/ CC: stable@vger.kernel.org # 4.19+ CC: Edward Adam Davis Reported-and-tested-by: syzbot+33f23b49ac24f986c9e8@syzkaller.appspotmail.com Reviewed-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 61e58066b5fd2..9c856a73d5333 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -740,6 +740,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, -- GitLab From 444d70889d199b7f74eec45f14768a83c0b04d73 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 16 Feb 2024 22:17:10 +0000 Subject: [PATCH 0484/1418] btrfs: send: don't issue unnecessary zero writes for trailing hole commit 5897710b28cabab04ea6c7547f27b7989de646ae upstream. If we have a sparse file with a trailing hole (from the last extent's end to i_size) and then create an extent in the file that ends before the file's i_size, then when doing an incremental send we will issue a write full of zeroes for the range that starts immediately after the new extent ends up to i_size. While this isn't incorrect because the file ends up with exactly the same data, it unnecessarily results in using extra space at the destination with one or more extents full of zeroes instead of having a hole. In same cases this results in using megabytes or even gigabytes of unnecessary space. Example, reproducer: $ cat test.sh #!/bin/bash DEV=/dev/sdh MNT=/mnt/sdh mkfs.btrfs -f $DEV mount $DEV $MNT # Create 1G sparse file. xfs_io -f -c "truncate 1G" $MNT/foobar # Create base snapshot. btrfs subvolume snapshot -r $MNT $MNT/mysnap1 # Create send stream (full send) for the base snapshot. btrfs send -f /tmp/1.snap $MNT/mysnap1 # Now write one extent at the beginning of the file and one somewhere # in the middle, leaving a gap between the end of this second extent # and the file's size. xfs_io -c "pwrite -S 0xab 0 128K" \ -c "pwrite -S 0xcd 512M 128K" \ $MNT/foobar # Now create a second snapshot which is going to be used for an # incremental send operation. btrfs subvolume snapshot -r $MNT $MNT/mysnap2 # Create send stream (incremental send) for the second snapshot. btrfs send -p $MNT/mysnap1 -f /tmp/2.snap $MNT/mysnap2 # Now recreate the filesystem by receiving both send streams and # verify we get the same content that the original filesystem had # and file foobar has only two extents with a size of 128K each. umount $MNT mkfs.btrfs -f $DEV mount $DEV $MNT btrfs receive -f /tmp/1.snap $MNT btrfs receive -f /tmp/2.snap $MNT echo -e "\nFile fiemap in the second snapshot:" # Should have: # # 128K extent at file range [0, 128K[ # hole at file range [128K, 512M[ # 128K extent file range [512M, 512M + 128K[ # hole at file range [512M + 128K, 1G[ xfs_io -r -c "fiemap -v" $MNT/mysnap2/foobar # File should be using 256K of data (two 128K extents). echo -e "\nSpace used by the file: $(du -h $MNT/mysnap2/foobar | cut -f 1)" umount $MNT Running the test, we can see with fiemap that we get an extent for the range [512M, 1G[, while in the source filesystem we have an extent for the range [512M, 512M + 128K[ and a hole for the rest of the file (the range [512M + 128K, 1G[): $ ./test.sh (...) File fiemap in the second snapshot: /mnt/sdh/mysnap2/foobar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..255]: 26624..26879 256 0x0 1: [256..1048575]: hole 1048320 2: [1048576..2097151]: 2156544..3205119 1048576 0x1 Space used by the file: 513M This happens because once we finish processing an inode, at finish_inode_if_needed(), we always issue a hole (write operations full of zeros) if there's a gap between the end of the last processed extent and the file's size, even if that range is already a hole in the parent snapshot. Fix this by issuing the hole only if the range is not already a hole. After this change, running the test above, we get the expected layout: $ ./test.sh (...) File fiemap in the second snapshot: /mnt/sdh/mysnap2/foobar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..255]: 26624..26879 256 0x0 1: [256..1048575]: hole 1048320 2: [1048576..1048831]: 26880..27135 256 0x1 3: [1048832..2097151]: hole 1048320 Space used by the file: 256K A test case for fstests will follow soon. CC: stable@vger.kernel.org # 6.1+ Reported-by: Dorai Ashok S A Link: https://lore.kernel.org/linux-btrfs/c0bf7818-9c45-46a8-b3d3-513230d0c86e@inix.me/ Reviewed-by: Sweet Tea Dorminy Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a75669972dc73..9f7ffd9ef6fd7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { -- GitLab From 2e443ed55fe3ffb08327b331a9f45e9382413c94 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Aug 2023 15:06:00 -0400 Subject: [PATCH 0485/1418] Revert "drm/amd/pm: resolve reboot exception for si oland" commit 955558030954b9637b41c97b730f9b38c92ac488 upstream. This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index dc0a6fba7050f..ff1032de4f76d 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } -- GitLab From 8dafc066c54669384ce01b4bbdfe9708a085afb9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:52 +0000 Subject: [PATCH 0486/1418] drm/buddy: fix range bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f41900e4a6ef019d64a70394b0e0c3bd048d4ec8 upstream. There is a corner case here where start/end is after/before the block range we are currently checking. If so we need to be sure that splitting the block will eventually give use the block size we need. To do that we should adjust the block range to account for the start/end, and only continue with the split if the size/alignment will fit the requested size. Not doing so can result in leaving split blocks unmerged when it eventually fails. Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: # v5.18+ Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-4-matthew.auld@intel.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_buddy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 7098f125b54a9..fd32041f82263 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* -- GitLab From 237ecf1afe6c22534fa43abdf2bf0b0f52de0aaa Mon Sep 17 00:00:00 2001 From: Peng Ma Date: Thu, 1 Feb 2024 16:50:07 -0500 Subject: [PATCH 0487/1418] dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read commit 9d739bccf261dd93ec1babf82f5c5d71dd4caa3e upstream. There is chip (ls1028a) errata: The SoC may hang on 16 byte unaligned read transactions by QDMA. Unaligned read transactions initiated by QDMA may stall in the NOC (Network On-Chip), causing a deadlock condition. Stalled transactions will trigger completion timeouts in PCIe controller. Workaround: Enable prefetch by setting the source descriptor prefetchable bit ( SD[PF] = 1 ). Implement this workaround. Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Peng Ma Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201215007.439503-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/fsl-qdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index f383f219ed008..915fa0cbb5539 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << -- GitLab From 034e2d70b5c7f578200ad09955aeb2aa65d1164a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Feb 2024 14:20:35 +0100 Subject: [PATCH 0488/1418] crypto: arm64/neonbs - fix out-of-bounds access on short input commit 1c0cf6d19690141002889d72622b90fc01562ce4 upstream. The bit-sliced implementation of AES-CTR operates on blocks of 128 bytes, and will fall back to the plain NEON version for tail blocks or inputs that are shorter than 128 bytes to begin with. It will call straight into the plain NEON asm helper, which performs all memory accesses in granules of 16 bytes (the size of a NEON register). For this reason, the associated plain NEON glue code will copy inputs shorter than 16 bytes into a temporary buffer, given that this is a rare occurrence and it is not worth the effort to work around this in the asm code. The fallback from the bit-sliced NEON version fails to take this into account, potentially resulting in out-of-bounds accesses. So clone the same workaround, and use a temp buffer for short in/outputs. Fixes: fc074e130051 ("crypto: arm64/aes-neonbs-ctr - fallback to plain NEON for final chunk") Cc: Reported-by: syzbot+f1ceaa1a09ab891e1934@syzkaller.appspotmail.com Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neonbs-glue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef6073..467ac2f768ac2 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); -- GitLab From 300111cd9042d133d1edd0255f50556211125ce9 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 22 Feb 2024 17:30:53 +0100 Subject: [PATCH 0489/1418] dmaengine: ptdma: use consistent DMA masks commit df2515a17914ecfc2a0594509deaf7fcb8d191ac upstream. The PTDMA driver sets DMA masks in two different places for the same device inconsistently. First call is in pt_pci_probe(), where it uses 48bit mask. The second call is in pt_dmaengine_register(), where it uses a 64bit mask. Using 64bit dma mask causes IO_PAGE_FAULT errors on DMA transfers between main memory and other devices. Without the extra call it works fine. Additionally the second call doesn't check the return value so it can silently fail. Remove the superfluous dma_set_mask() call and only use 48bit mask. Cc: stable@vger.kernel.org Fixes: b0b4a6b10577 ("dmaengine: ptdma: register PTDMA controller as a DMA resource") Reviewed-by: Basavaraj Natikar Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20240222163053.13842-1-tstruk@gigaio.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ptdma/ptdma-dmaengine.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3a..f792407348077 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; -- GitLab From 474d521da890b3e3585335fb80a6044cb2553d99 Mon Sep 17 00:00:00 2001 From: Curtis Klein Date: Thu, 1 Feb 2024 17:04:06 -0500 Subject: [PATCH 0490/1418] dmaengine: fsl-qdma: init irq after reg initialization commit 87a39071e0b639f45e05d296cc0538eef44ec0bd upstream. Initialize the qDMA irqs after the registers are configured so that interrupts that may have been pending from a primary kernel don't get processed by the irq handler before it is ready to and cause panic with the following trace: Call trace: fsl_qdma_queue_handler+0xf8/0x3e8 __handle_irq_event_percpu+0x78/0x2b0 handle_irq_event_percpu+0x1c/0x68 handle_irq_event+0x44/0x78 handle_fasteoi_irq+0xc8/0x178 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x90/0x100 gic_handle_irq+0x5c/0xb8 el1_irq+0xb8/0x180 _raw_spin_unlock_irqrestore+0x14/0x40 __setup_irq+0x4bc/0x798 request_threaded_irq+0xd8/0x190 devm_request_threaded_irq+0x74/0xe8 fsl_qdma_probe+0x4d4/0xca8 platform_drv_probe+0x50/0xa0 really_probe+0xe0/0x3f8 driver_probe_device+0x64/0x130 device_driver_attach+0x6c/0x78 __driver_attach+0xbc/0x158 bus_for_each_dev+0x5c/0x98 driver_attach+0x20/0x28 bus_add_driver+0x158/0x220 driver_register+0x60/0x110 __platform_driver_register+0x44/0x50 fsl_qdma_driver_init+0x18/0x20 do_one_initcall+0x48/0x258 kernel_init_freeable+0x1a4/0x23c kernel_init+0x10/0xf8 ret_from_fork+0x10/0x18 Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Curtis Klein Signed-off-by: Yi Zhao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201220406.440145-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/fsl-qdma.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 915fa0cbb5539..7082a5a6814a4 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -1203,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1245,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); + ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } - ret = fsl_qdma_reg_init(fsl_qdma); + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); if (ret) { - dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); return ret; } -- GitLab From 70af82bb9c897faa25a44e4181f36c60312b71ef Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Wed, 7 Feb 2024 15:39:51 +0100 Subject: [PATCH 0491/1418] mmc: mmci: stm32: fix DMA API overlapping mappings warning commit 6b1ba3f9040be5efc4396d86c9752cdc564730be upstream. Turning on CONFIG_DMA_API_DEBUG_SG results in the following warning: DMA-API: mmci-pl18x 48220000.mmc: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 1 PID: 51 at kernel/dma/debug.c:568 add_dma_entry+0x234/0x2f4 Modules linked in: CPU: 1 PID: 51 Comm: kworker/1:2 Not tainted 6.1.28 #1 Hardware name: STMicroelectronics STM32MP257F-EV1 Evaluation Board (DT) Workqueue: events_freezable mmc_rescan Call trace: add_dma_entry+0x234/0x2f4 debug_dma_map_sg+0x198/0x350 __dma_map_sg_attrs+0xa0/0x110 dma_map_sg_attrs+0x10/0x2c sdmmc_idma_prep_data+0x80/0xc0 mmci_prep_data+0x38/0x84 mmci_start_data+0x108/0x2dc mmci_request+0xe4/0x190 __mmc_start_request+0x68/0x140 mmc_start_request+0x94/0xc0 mmc_wait_for_req+0x70/0x100 mmc_send_tuning+0x108/0x1ac sdmmc_execute_tuning+0x14c/0x210 mmc_execute_tuning+0x48/0xec mmc_sd_init_uhs_card.part.0+0x208/0x464 mmc_sd_init_card+0x318/0x89c mmc_attach_sd+0xe4/0x180 mmc_rescan+0x244/0x320 DMA API debug brings to light leaking dma-mappings as dma_map_sg and dma_unmap_sg are not correctly balanced. If an error occurs in mmci_cmd_irq function, only mmci_dma_error function is called and as this API is not managed on stm32 variant, dma_unmap_sg is never called in this error path. Signed-off-by: Christophe Kerello Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240207143951.938144-1-christophe.kerello@foss.st.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmci_stm32_sdmmc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 60bca78a72b19..0511583ffa764 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, -- GitLab From bc9f87a41d185d7678c5742a4f5952df04bf2375 Mon Sep 17 00:00:00 2001 From: Ivan Semenov Date: Tue, 6 Feb 2024 19:28:45 +0200 Subject: [PATCH 0492/1418] mmc: core: Fix eMMC initialization with 1-bit bus connection commit ff3206d2186d84e4f77e1378ba1d225633f17b9b upstream. Initializing an eMMC that's connected via a 1-bit bus is current failing, if the HW (DT) informs that 4-bit bus is supported. In fact this is a regression, as we were earlier capable of falling back to 1-bit mode, when switching to 4/8-bit bus failed. Therefore, let's restore the behaviour. Log for Samsung eMMC 5.1 chip connected via 1bit bus (only D0 pin) Before patch: [134509.044225] mmc0: switch to bus width 4 failed [134509.044509] mmc0: new high speed MMC card at address 0001 [134509.054594] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134509.281602] mmc0: switch to bus width 4 failed [134509.282638] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.282657] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284598] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.284602] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284609] ldm_validate_partition_table(): Disk read failed. [134509.286495] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.286500] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.288303] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.288308] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289540] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.289544] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289553] mmcblk0: unable to read partition table [134509.289728] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134509.290283] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB [134509.294577] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 [134509.295835] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.295841] Buffer I/O error on dev mmcblk0, logical block 0, async page read After patch: [134551.089613] mmc0: switch to bus width 4 failed [134551.090377] mmc0: new high speed MMC card at address 0001 [134551.102271] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134551.113365] mmcblk0: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 p13 p14 p15 p16 p17 p18 p19 p20 p21 [134551.114262] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134551.114925] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB Fixes: 577fb13199b1 ("mmc: rework selection of bus speed mode") Cc: stable@vger.kernel.org Signed-off-by: Ivan Semenov Link: https://lore.kernel.org/r/20240206172845.34316-1-ivan@semenov.dev Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a46ce0868fe1f..3a927452a6501 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; -- GitLab From c65c475560851291ad64272d4b85b55e70c4adbb Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 21:17:14 +0200 Subject: [PATCH 0493/1418] mmc: sdhci-xenon: add timeout for PHY init complete commit 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 upstream. AC5X spec says PHY init complete bit must be polled until zero. We see cases in which timeout can take longer than the standard calculation on AC5X, which is expected following the spec comment above. According to the spec, we must wait as long as it takes for that bit to toggle on AC5X. Cap that with 100 delay loops so we won't get stuck forever. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-xenon-phy.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de659..52a8e1217124a 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -109,6 +109,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -259,18 +261,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 -- GitLab From 4974d928d5e3909bd8cfe4b0bca2509636a8ebf2 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 22:09:30 +0200 Subject: [PATCH 0494/1418] mmc: sdhci-xenon: fix PHY init clock stability commit 8e9f25a290ae0016353c9ea13314c95fb3207812 upstream. Each time SD/mmc phy is initialized, at times, in some of the attempts, phy fails to completes its initialization which results into timeout error. Per the HW spec, it is a pre-requisite to ensure a stable SD clock before a phy initialization is attempted. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-xenon-phy.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 52a8e1217124a..cc9d28b75eb91 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" @@ -218,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -234,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); -- GitLab From 76109a226a39aea5d621b9b0af04ba23fc9cf7de Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 2 Feb 2024 01:51:02 +0000 Subject: [PATCH 0495/1418] riscv: add CALLER_ADDRx support commit 680341382da56bd192ebfa4e58eaf4fec2e5bca7 upstream. CALLER_ADDRx returns caller's address at specified level, they are used for several tracers. These macros eventually use __builtin_return_address(n) to get the caller's address if arch doesn't define their own implementation. In RISC-V, __builtin_return_address(n) only works when n == 0, we need to walk the stack frame to get the caller's address at specified level. data.level started from 'level + 3' due to the call flow of getting caller's address in RISC-V implementation. If we don't have additional three iteration, the level is corresponding to follows: callsite -> return_address -> arch_stack_walk -> walk_stackframe | | | | level 3 level 2 level 1 level 0 Fixes: 10626c32e382 ("riscv/ftrace: Add basic support") Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Signed-off-by: Zong Li Link: https://lore.kernel.org/r/20240202015102.26251-1-zong.li@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/ftrace.h | 5 ++++ arch/riscv/kernel/Makefile | 2 ++ arch/riscv/kernel/return_address.c | 48 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 arch/riscv/kernel/return_address.c diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d47d87c2d7e3d..dcf1bc9de5841 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ab333cb792fd9..4c0805d264ca8 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -41,6 +42,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 0000000000000..c8115ec8fb304 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include +#include +#include + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); -- GitLab From 249d6ca4ff0022a4b51a8eb9fac6d7bff2c94d1b Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Fri, 26 Jan 2024 17:25:23 +0100 Subject: [PATCH 0496/1418] efivarfs: Request at most 512 bytes for variable names commit f45812cc23fb74bef62d4eb8a69fe7218f4b9f2a upstream. Work around a quirk in a few old (2011-ish) UEFI implementations, where a call to `GetNextVariableName` with a buffer size larger than 512 bytes will always return EFI_INVALID_PARAMETER. There is some lore around EFI variable names being up to 1024 bytes in size, but this has no basis in the UEFI specification, and the upper bounds are typically platform specific, and apply to the entire variable (name plus payload). Given that Linux does not permit creating files with names longer than NAME_MAX (255) bytes, 512 bytes (== 256 UTF-16 characters) is a reasonable limit. Cc: # 6.1+ Signed-off-by: Tim Schumacher Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- fs/efivarfs/vars.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 9e4f47808bd5a..13bc606989557 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } -- GitLab From 396a4120011d8d574eb57793efb0eec5f271a2c8 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 26 Feb 2024 17:49:57 -0800 Subject: [PATCH 0497/1418] pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation commit 2a93c6cbd5a703d44c414a3c3945a87ce11430ba upstream. Commit 'e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain")' aimed to make sure that a power-domain that is being enabled without any particular performance-state requested will at least turn the rail on, to avoid filling DeviceTree with otherwise unnecessary required-opps properties. But in the event that aggregation happens on a disabled power-domain, with an enabled peer without performance-state, both the local and peer corner are 0. The peer's enabled_corner is not considered, with the result that the underlying (shared) resource is disabled. One case where this can be observed is when the display stack keeps mmcx enabled (but without a particular performance-state vote) in order to access registers and sync_state happens in the rpmhpd driver. As mmcx_ao is flushed the state of the peer (mmcx) is not considered and mmcx_ao ends up turning off "mmcx.lvl" underneath mmcx. This has been observed several times, but has been painted over in DeviceTree by adding an explicit vote for the lowest non-disabled performance-state. Fixes: e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain") Reported-by: Johan Hovold Closes: https://lore.kernel.org/linux-arm-msm/ZdMwZa98L23mu3u6@hovoldconsulting.com/ Cc: Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240226-rpmhpd-enable-corner-fix-v1-1-68c004cec48c@quicinc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/rpmhpd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 092f6ab09acf3..9a90f241bb97f 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); -- GitLab From c9fa51d4c434fa7bdafd0c7a9e19cf9023787fd4 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 31 Jan 2024 01:04:28 +0100 Subject: [PATCH 0498/1418] x86/e820: Don't reserve SETUP_RNG_SEED in e820 commit 7fd817c906503b6813ea3b41f5fdf4192449a707 upstream. SETUP_RNG_SEED in setup_data is supplied by kexec and should not be reserved in the e820 map. Doing so reserves 16 bytes of RAM when booting with kexec. (16 bytes because data->len is zeroed by parse_setup_data so only sizeof(setup_data) is reserved.) When kexec is used repeatedly, each boot adds two entries in the kexec-provided e820 map as the 16-byte range splits a larger range of usable memory. Eventually all of the 128 available entries get used up. The next split will result in losing usable memory as the new entries cannot be added to the e820 map. Fixes: 68b8e9713c8e ("x86/setup: Use rng seeds from setup_data") Signed-off-by: Jiri Bohac Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/ZbmOjKnARGiaYBd5@dwarf.suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9dac24680ff8e..993734e96615a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); -- GitLab From 65742f4bb1f919caa564b8a20b15b8cdd6eca2ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Feb 2024 00:09:02 +0100 Subject: [PATCH 0499/1418] x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers commit 6890cb1ace350b4386c8aee1343dc3b3ddd214da upstream. MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits. detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup. This fixes boot on TDX-enabled systems, which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo. Reported-by: Zixi Chen Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-3-pbonzini%40redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 178 ++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 87 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4278996504833..32bd640170475 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci) } EXPORT_SYMBOL_GPL(intel_cpu_collect_info); +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); -- GitLab From e6e04845c2e8af9fef7d58439e9f62a3ed93f33b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:31 +0100 Subject: [PATCH 0500/1418] mptcp: fix data races on local_id commit a7cfe776637004a4c938fde78be4bd608c32c3ef upstream. The local address id is accessed lockless by the NL PM, add all the required ONCE annotation. There is a caveat: the local id can be initialized late in the subflow life-cycle, and its validity is controlled by the local_id_valid flag. Remove such flag and encode the validity in the local_id field itself with negative value before initialization. That allows accessing the field consistently with a single read operation. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/diag.c | 2 +- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 13 +++++++++++-- net/mptcp/subflow.c | 9 +++++---- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f0351..6ff6f14674aa2 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -65,7 +65,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 70a1025f093cf..3632f4830420a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -799,7 +799,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) continue; @@ -808,7 +808,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, + i, rm_id, id, subflow->remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -2028,7 +2028,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 631fa104617c3..67eccc141a6c1 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -233,7 +233,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->local_id == 0) { + if (READ_ONCE(subflow->local_id) == 0) { has_id_0 = true; break; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 859b18cb8e4f6..cdabb00648bd2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->request_mptcp = 1; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); return 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b092205213234..2bc37773e7803 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -475,7 +475,6 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; @@ -483,7 +482,7 @@ struct mptcp_subflow_context { u32 local_nonce; u32 remote_token; u8 hmac[MPTCPOPT_HMAC_LEN]; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -909,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 45d20e20cfc00..83bc438b98257 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -489,8 +489,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1867,7 +1868,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; -- GitLab From e64148635509bf13eea851986f5a0b150e5bd066 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:32 +0100 Subject: [PATCH 0501/1418] mptcp: fix data races on remote_id commit 967d3c27127e71a10ff5c083583a038606431b61 upstream. Similar to the previous patch, address the data race on remote_id, adding the suitable ONCE annotations. Fixes: bedee0b56113 ("mptcp: address lookup improvements") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/subflow.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3632f4830420a..582d0c641ed14 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -449,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; @@ -798,18 +798,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 83bc438b98257..891c2f4fed080 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); @@ -1874,7 +1874,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; -- GitLab From fbccc5eb1652b6c4ff446f34eb5a18869b7f4f3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:33 +0100 Subject: [PATCH 0502/1418] mptcp: fix duplicate subflow creation commit 045e9d812868a2d80b7a57b224ce8009444b7bbc upstream. Fullmesh endpoints could end-up unexpectedly generating duplicate subflows - same local and remote addresses - when multiple incoming ADD_ADDR are processed before the PM creates the subflow for the local endpoints. Address the issue explicitly checking for duplicates at subflow creation time. To avoid a quadratic computational complexity, track the unavailable remote address ids in a temporary bitmap and initialize such bitmap with the remote ids of all the existing subflows matching the local address currently processed. The above allows additionally replacing the existing code checking for duplicate entry in the current set with a simple bit test operation. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 582d0c641ed14..3328870b0c1f8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, struct mptcp_addr_info *addrs) { bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); @@ -446,6 +435,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); @@ -453,8 +452,11 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm if (deny_id0 && !addrs[i].id) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } @@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - nr = fill_remote_addresses_vec(msk, fullmesh, addrs); + nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); if (nr) __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); spin_unlock_bh(&msk->pm.lock); -- GitLab From 53e3f2ee8a0ce7fb33488325a74b678ddf74632a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Feb 2024 18:21:21 +0100 Subject: [PATCH 0503/1418] mptcp: continue marking the first subflow as UNCONNECTED After the 'Fixes' commit mentioned below, which is a partial backport, the MPTCP worker was no longer marking the first subflow as "UNCONNECTED" when the socket was transitioning to TCP_CLOSE state. As a result, in v6.1, it was no longer possible to reconnect to the just disconnected socket. Continue to do that like before, only for the first subflow. A few refactoring have been done around the 'msk->subflow' in later versions, and it looks like this is not needed to do that there, but still needed in v6.1. Without that, the 'disconnect' tests from the mptcp_connect.sh selftest fail: they repeat the transfer 3 times by reconnecting to the server each time. Fixes: 7857e35ef10e ("mptcp: get rid of msk->subflow") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cdabb00648bd2..125825db642cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2440,6 +2440,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); + if (msk->subflow && ssk == msk->subflow->sk) + msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; -- GitLab From fb7be5e5ec265a47f6763b6d772873da78bb09d0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 17:14:11 +0100 Subject: [PATCH 0504/1418] mptcp: map v4 address to v6 when destroying subflow commit 535d620ea5ff1a033dc64ee3d912acadc7470619 upstream. Address family of server side mismatches with that of client side, like in "userspace pm add & remove address" test: userspace_pm_add_addr $ns1 10.0.2.1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED That's because on the server side, the family is set to AF_INET6 and the v4 address is mapped in a v6 one. This patch fixes this issue. In mptcp_pm_nl_subflow_destroy_doit(), before checking local address family with remote address family, map an IPv4 address to an IPv6 address if the pair is a v4-mapped address. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/387 Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-1-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 67eccc141a6c1..414ed70e7ba2e 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -489,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; -- GitLab From 84a3c10a0c79ede027b030f61a89b6ab7cf98226 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:14 +0100 Subject: [PATCH 0505/1418] mptcp: push at DSS boundaries commit b9cd26f640a308ea314ad23532de9a8592cd09d2 upstream. when inserting not contiguous data in the subflow write queue, the protocol creates a new skb and prevent the TCP stack from merging it later with already queued skbs by setting the EOR marker. Still no push flag is explicitly set at the end of previous GSO packet, making the aggregation on the receiver side sub-optimal - and packetdrill self-tests less predictable. Explicitly mark the end of not contiguous DSS with the push flag. Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-4-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 125825db642cc..be53e5460962e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = skb_ext_find(skb, SKB_EXT_MPTCP); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } -- GitLab From 03ad085eb14db2ddc4de5d9474426d258dc53954 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 17:14:17 +0100 Subject: [PATCH 0506/1418] selftests: mptcp: join: add ss mptcp support check commit 9480f388a2ef54fba911d9325372abd69a328601 upstream. Commands 'ss -M' are used in script mptcp_join.sh to display only MPTCP sockets. So it must be checked if ss tool supports MPTCP in this script. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-7-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2107579e2939d..a20dca9d26d68 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -144,6 +144,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" -- GitLab From a8722cece375838f7067aa929d89a819f7d1ae96 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:15 +0100 Subject: [PATCH 0507/1418] mptcp: fix snd_wnd initialization for passive socket commit adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce upstream. Such value should be inherited from the first subflow, but passive sockets always used 'rsk_rcv_wnd'. Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-5-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index be53e5460962e..1fc5f6649e32d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3203,7 +3203,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { -- GitLab From d93fd40c62397326046902a2c5cb75af50882a85 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 23 Feb 2024 17:14:18 +0100 Subject: [PATCH 0508/1418] mptcp: fix double-free on socket dismantle commit 10048689def7e40a4405acda16fdc6477d4ecc5c upstream. when MPTCP server accepts an incoming connection, it clones its listener socket. However, the pointer to 'inet_opt' for the new socket has the same value as the original one: as a consequence, on program exit it's possible to observe the following splat: BUG: KASAN: double-free in inet_sock_destruct+0x54f/0x8b0 Free of addr ffff888485950880 by task swapper/25/0 CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Not tainted 6.8.0-rc1+ #609 Hardware name: Supermicro SYS-6027R-72RF/X9DRH-7TF/7F/iTF/iF, BIOS 3.0 07/26/2013 Call Trace: dump_stack_lvl+0x32/0x50 print_report+0xca/0x620 kasan_report_invalid_free+0x64/0x90 __kasan_slab_free+0x1aa/0x1f0 kfree+0xed/0x2e0 inet_sock_destruct+0x54f/0x8b0 __sk_destruct+0x48/0x5b0 rcu_do_batch+0x34e/0xd90 rcu_core+0x559/0xac0 __do_softirq+0x183/0x5a4 irq_exit_rcu+0x12d/0x170 sysvec_apic_timer_interrupt+0x6b/0x80 asm_sysvec_apic_timer_interrupt+0x16/0x20 RIP: 0010:cpuidle_enter_state+0x175/0x300 Code: 30 00 0f 84 1f 01 00 00 83 e8 01 83 f8 ff 75 e5 48 83 c4 18 44 89 e8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc fb 45 85 ed <0f> 89 60 ff ff ff 48 c1 e5 06 48 c7 43 18 00 00 00 00 48 83 44 2b RSP: 0018:ffff888481cf7d90 EFLAGS: 00000202 RAX: 0000000000000000 RBX: ffff88887facddc8 RCX: 0000000000000000 RDX: 1ffff1110ff588b1 RSI: 0000000000000019 RDI: ffff88887fac4588 RBP: 0000000000000004 R08: 0000000000000002 R09: 0000000000043080 R10: 0009b02ea273363f R11: ffff88887fabf42b R12: ffffffff932592e0 R13: 0000000000000004 R14: 0000000000000000 R15: 00000022c880ec80 cpuidle_enter+0x4a/0xa0 do_idle+0x310/0x410 cpu_startup_entry+0x51/0x60 start_secondary+0x211/0x270 secondary_startup_64_no_verify+0x184/0x18b Allocated by task 6853: kasan_save_stack+0x1c/0x40 kasan_save_track+0x10/0x30 __kasan_kmalloc+0xa6/0xb0 __kmalloc+0x1eb/0x450 cipso_v4_sock_setattr+0x96/0x360 netlbl_sock_setattr+0x132/0x1f0 selinux_netlbl_socket_post_create+0x6c/0x110 selinux_socket_post_create+0x37b/0x7f0 security_socket_post_create+0x63/0xb0 __sock_create+0x305/0x450 __sys_socket_create.part.23+0xbd/0x130 __sys_socket+0x37/0xb0 __x64_sys_socket+0x6f/0xb0 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Freed by task 6858: kasan_save_stack+0x1c/0x40 kasan_save_track+0x10/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x12c/0x1f0 kfree+0xed/0x2e0 inet_sock_destruct+0x54f/0x8b0 __sk_destruct+0x48/0x5b0 subflow_ulp_release+0x1f0/0x250 tcp_cleanup_ulp+0x6e/0x110 tcp_v4_destroy_sock+0x5a/0x3a0 inet_csk_destroy_sock+0x135/0x390 tcp_fin+0x416/0x5c0 tcp_data_queue+0x1bc8/0x4310 tcp_rcv_state_process+0x15a3/0x47b0 tcp_v4_do_rcv+0x2c1/0x990 tcp_v4_rcv+0x41fb/0x5ed0 ip_protocol_deliver_rcu+0x6d/0x9f0 ip_local_deliver_finish+0x278/0x360 ip_local_deliver+0x182/0x2c0 ip_rcv+0xb5/0x1c0 __netif_receive_skb_one_core+0x16e/0x1b0 process_backlog+0x1e3/0x650 __napi_poll+0xa6/0x500 net_rx_action+0x740/0xbb0 __do_softirq+0x183/0x5a4 The buggy address belongs to the object at ffff888485950880 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 0 bytes inside of 64-byte region [ffff888485950880, ffff8884859508c0) The buggy address belongs to the physical page: page:0000000056d1e95e refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888485950700 pfn:0x485950 flags: 0x57ffffc0000800(slab|node=1|zone=2|lastcpupid=0x1fffff) page_type: 0xffffffff() raw: 0057ffffc0000800 ffff88810004c640 ffffea00121b8ac0 dead000000000006 raw: ffff888485950700 0000000000200019 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888485950780: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff888485950800: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff888485950880: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff888485950900: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff888485950980: 00 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc Something similar (a refcount underflow) happens with CALIPSO/IPv6. Fix this by duplicating IP / IPv6 options after clone, so that ip{,6}_sock_destruct() doesn't end up freeing the same memory area twice. Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Cc: stable@vger.kernel.org Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-8-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1fc5f6649e32d..3bc21581486ae 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3169,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3191,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; -- GitLab From f27d319df055629480b84b9288a502337b6f2a2e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:19 +0100 Subject: [PATCH 0509/1418] mptcp: fix possible deadlock in subflow diag commit d6a9608af9a75d13243d217f6ce1e30e57d56ffe upstream. Syzbot and Eric reported a lockdep splat in the subflow diag: WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Not tainted syz-executor.2/24141 is trying to acquire lock: ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 but task is already holding lock: ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&h->lhash2[i].lock){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __inet_hash+0x335/0xbe0 net/ipv4/inet_hashtables.c:743 inet_csk_listen_start+0x23a/0x320 net/ipv4/inet_connection_sock.c:1261 __inet_listen_sk+0x2a2/0x770 net/ipv4/af_inet.c:217 inet_listen+0xa3/0x110 net/ipv4/af_inet.c:239 rds_tcp_listen_init+0x3fd/0x5a0 net/rds/tcp_listen.c:316 rds_tcp_init_net+0x141/0x320 net/rds/tcp.c:577 ops_init+0x352/0x610 net/core/net_namespace.c:136 __register_pernet_operations net/core/net_namespace.c:1214 [inline] register_pernet_operations+0x2cb/0x660 net/core/net_namespace.c:1283 register_pernet_device+0x33/0x80 net/core/net_namespace.c:1370 rds_tcp_init+0x62/0xd0 net/rds/tcp.c:735 do_one_initcall+0x238/0x830 init/main.c:1236 do_initcall_level+0x157/0x210 init/main.c:1298 do_initcalls+0x3f/0x80 init/main.c:1314 kernel_init_freeable+0x42f/0x5d0 init/main.c:1551 kernel_init+0x1d/0x2a0 init/main.c:1441 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 -> #0 (k-sk_lock-AF_INET6){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_fast include/net/sock.h:1723 [inline] subflow_get_info+0x166/0xd20 net/mptcp/diag.c:28 tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 inet_sk_diag_fill+0x10ed/0x1e00 net/ipv4/inet_diag.c:345 inet_diag_dump_icsk+0x55b/0x1f80 net/ipv4/inet_diag.c:1061 __inet_diag_dump+0x211/0x3a0 net/ipv4/inet_diag.c:1263 inet_diag_dump_compat+0x1c1/0x2d0 net/ipv4/inet_diag.c:1371 netlink_dump+0x59b/0xc80 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5df/0x790 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] inet_diag_rcv_msg_compat+0x209/0x4c0 net/ipv4/inet_diag.c:1405 sock_diag_rcv_msg+0xe7/0x410 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 As noted by Eric we can break the lock dependency chain avoid dumping any extended info for the mptcp subflow listener: nothing actually useful is presented there. Fixes: b8adb69a7d29 ("mptcp: fix lockless access in subflow ULP diag") Cc: stable@vger.kernel.org Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89iJ=Oecw6OZDwmSYc9HJKQ_G32uN11L+oUcMu+TOD5Xiaw@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-9-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 6ff6f14674aa2..7017dd60659dc 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; -- GitLab From 88067197e97af3fcb104dd86030f788ec1b32fdb Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 4 Jan 2023 10:01:38 +0200 Subject: [PATCH 0510/1418] RDMA/core: Refactor rdma_bind_addr commit 8d037973d48c026224ab285e6a06985ccac6f7bf upstream. Refactor rdma_bind_addr function so that it doesn't require that the cma destination address be changed before calling it. So now it will update the destination address internally only when it is really needed and after passing all the required checks. Which in turn results in a cleaner and more sensible call and error handling flows for the functions that call it directly or indirectly. Signed-off-by: Patrisious Haddad Reported-by: Wei Chen Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/3d0e9a2fd62bc10ba02fed1c7c48a48638952320.1672819273.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 253 +++++++++++++++++----------------- 1 file changed, 130 insertions(+), 123 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0773ca7ace247..950c8422aec29 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3547,121 +3547,6 @@ err: return ret; } -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - struct sockaddr_storage zero_sock = {}; - - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - - /* - * When the src_addr is not specified, automatically supply an any addr - */ - zero_sock.ss_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = - (struct sockaddr_in6 *)&zero_sock; - struct sockaddr_in6 *dst_addr6 = - (struct sockaddr_in6 *)dst_addr; - - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = - dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *)&zero_sock)->sib_pkey = - ((struct sockaddr_ib *)dst_addr)->sib_pkey; - } - return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); -} - -/* - * If required, resolve the source address for bind and leave the id_priv in - * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior - * calls made by ULP, a previously bound ID will not be re-bound and src_addr is - * ignored. - */ -static int resolve_prepare_src(struct rdma_id_private *id_priv, - struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - int ret; - - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - /* For a well behaved ULP state will be RDMA_CM_IDLE */ - ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); - if (ret) - goto err_dst; - if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, - RDMA_CM_ADDR_QUERY))) { - ret = -EINVAL; - goto err_dst; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - ret = -EINVAL; - goto err_state; - } - return 0; - -err_state: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); -err_dst: - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return ret; -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) -{ - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - int ret; - - ret = resolve_prepare_src(id_priv, src_addr, dst_addr); - if (ret) - return ret; - - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - /* - * The FSM can return back to RDMA_CM_ADDR_BOUND after - * rdma_resolve_ip() is called, eg through the error - * path in addr_handler(). If this happens the existing - * request must be canceled before issuing a new one. - * Since canceling a request is a bit slow and this - * oddball path is rare, keep track once a request has - * been issued. The track turns out to be a permanent - * state since this is the only cancel as it is - * immediately before rdma_resolve_ip(). - */ - if (id_priv->used_resolve_ip) - rdma_addr_cancel(&id->route.addr.dev_addr); - else - id_priv->used_resolve_ip = 1; - ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, - &id->route.addr.dev_addr, - timeout_ms, addr_handler, - false, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -4064,27 +3949,26 @@ err: } EXPORT_SYMBOL(rdma_listen); -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; - id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1; @@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family; ret = cma_get_port(id_priv); if (ret) @@ -4121,6 +4007,127 @@ err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); +} + +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + int ret; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + + } + + if (cma_family(id_priv) != dst_addr->sa_family) { + ret = -EINVAL; + goto err_state; + } + return 0; + +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; + + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) -- GitLab From 2d9b3e1ae1bed1f20621d5cc95e74746a4afbe7d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 12 Jul 2023 18:41:33 -0500 Subject: [PATCH 0511/1418] RDMA/core: Update CMA destination address on rdma_resolve_addr commit 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 upstream. 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") intoduces as regression on irdma devices on certain tests which uses rdma CM, such as cmtime. No connections can be established with the MAD QP experiences a fatal error on the active side. The cma destination address is not updated with the dst_addr when ULP on active side calls rdma_bind_addr followed by rdma_resolve_addr. The id_priv state is 'bound' in resolve_prepare_src and update is skipped. This leaves the dgid passed into irdma driver to create an Address Handle (AH) for the MAD QP at 0. The create AH descriptor as well as the ARP cache entry is invalid and HW throws an asynchronous events as result. [ 1207.656888] resolve_prepare_src caller: ucma_resolve_addr+0xff/0x170 [rdma_ucm] daddr=200.0.4.28 id_priv->state=7 [....] [ 1207.680362] ice 0000:07:00.1 rocep7s0f1: caller: irdma_create_ah+0x3e/0x70 [irdma] ah_id=0 arp_idx=0 dest_ip=0.0.0.0 destMAC=00:00:64:ca:b7:52 ipvalid=1 raw=0000:0000:0000:0000:0000:ffff:0000:0000 [ 1207.682077] ice 0000:07:00.1 rocep7s0f1: abnormal ae_id = 0x401 bool qp=1 qp_id = 1, ae_src=5 [ 1207.691657] infiniband rocep7s0f1: Fatal error (1) on MAD QP (1) Fix this by updating the CMA destination address when the ULP calls a resolve address with the CM state already bound. Fixes: 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230712234133.1343-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 950c8422aec29..067d7f42871ff 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4060,6 +4060,8 @@ static int resolve_prepare_src(struct rdma_id_private *id_priv, RDMA_CM_ADDR_QUERY))) return -EINVAL; + } else { + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); } if (cma_family(id_priv) != dst_addr->sa_family) { -- GitLab From e7945d93fece3ae43d2ed47d5ef4e254c8a3b712 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Aug 2022 11:00:16 +0200 Subject: [PATCH 0512/1418] efi: libstub: use EFI_LOADER_CODE region when moving the kernel in memory commit 9cf42bca30e98a1c6c9e8abf876940a551eaa3d1 upstream. The EFI spec is not very clear about which permissions are being given when allocating pages of a certain type. However, it is quite obvious that EFI_LOADER_CODE is more likely to permit execution than EFI_LOADER_DATA, which becomes relevant once we permit booting the kernel proper with the firmware's 1:1 mapping still active. Ostensibly, recent systems such as the Surface Pro X grant executable permissions to EFI_LOADER_CODE regions but not EFI_LOADER_DATA regions. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/alignedmem.c | 5 +++-- drivers/firmware/efi/libstub/arm64-stub.c | 6 ++++-- drivers/firmware/efi/libstub/efistub.h | 6 ++++-- drivers/firmware/efi/libstub/mem.c | 3 ++- drivers/firmware/efi/libstub/randomalloc.c | 5 +++-- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 1de9878ddd3a2..174832661251e 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -22,7 +22,8 @@ * Return: status code */ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align) + unsigned long max, unsigned long align, + int memory_type) { efi_physical_addr_t alloc_addr; efi_status_t status; @@ -36,7 +37,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, slack = align / EFI_PAGE_SIZE - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, - EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack, + memory_type, size / EFI_PAGE_SIZE + slack, &alloc_addr); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e2f90566b291a..08f46c072da56 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * locate the kernel at a randomized offset in physical memory. */ status = efi_random_alloc(*reserve_size, min_kimg_align, - reserve_addr, phys_seed); + reserve_addr, phys_seed, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { @@ -201,7 +202,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align); + ULONG_MAX, min_kimg_align, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 970e86e3aab05..ab505b07e626b 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -880,7 +880,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long random_seed); + unsigned long *addr, unsigned long random_seed, + int memory_type); efi_status_t efi_random_get_seed(void); @@ -907,7 +908,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align); + unsigned long max, unsigned long align, + int memory_type); efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 45841ef55a9f6..03d147f17185b 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -91,7 +91,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, - EFI_ALLOC_ALIGN); + EFI_ALLOC_ALIGN, + EFI_LOADER_DATA); alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 9fb5869896be7..ec44bb7e092fa 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -53,7 +53,8 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, - unsigned long random_seed) + unsigned long random_seed, + int memory_type) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -118,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, pages, &target); + memory_type, pages, &target); if (status == EFI_SUCCESS) *addr = target; break; -- GitLab From bad6e66d0701d88a1b7018ca0334b551fb71d74a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:01 +0100 Subject: [PATCH 0513/1418] x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S commit cb8bda8ad4438b4bcfcf89697fc84803fb210017 upstream. In preparation for moving the mixed mode specific code out of head_64.S, rename the existing file to clarify that it contains more than just the mixed mode thunk. While at it, clean up the Makefile rules that add it to the build. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-2-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 6 +++--- arch/x86/boot/compressed/{efi_thunk_64.S => efi_mixed.S} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename arch/x86/boot/compressed/{efi_thunk_64.S => efi_mixed.S} (100%) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 15b7b403a4bd0..27c82c78ac260 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -108,11 +108,11 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o -vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o -efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_mixed.S similarity index 100% rename from arch/x86/boot/compressed/efi_thunk_64.S rename to arch/x86/boot/compressed/efi_mixed.S -- GitLab From 3bad8dc0ae8db10540290c69bbb3a3f8e6e5aff4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:02 +0100 Subject: [PATCH 0514/1418] x86/boot/compressed: Move 32-bit entrypoint code into .text section commit e2ab9eab324cdf240de89741e4a1aa79919f0196 upstream. Move the code that stores the arguments passed to the EFI entrypoint into the .text section, so that it can be moved into a separate compilation unit in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-3-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 48 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b4bd6df29116f..61b1be41867ce 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -303,24 +303,41 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) + .text +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * %esi struct bootparams pointer (or NULL when not using + * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START_LOCAL(efi32_entry) call 1f -1: pop %ebp - subl $ rva(1b), %ebp - - movl %esi, rva(efi32_boot_args+8)(%ebp) -SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) - movl %ecx, rva(efi32_boot_args)(%ebp) - movl %edx, rva(efi32_boot_args+4)(%ebp) - movb $0, rva(efi_is64)(%ebp) +1: pop %ebx /* Save firmware GDTR and code/data selectors */ - sgdtl rva(efi32_boot_gdt)(%ebp) - movw %cs, rva(efi32_boot_cs)(%ebp) - movw %ds, rva(efi32_boot_ds)(%ebp) + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) /* Store firmware IDT descriptor */ - sidtl rva(efi32_boot_idt)(%ebp) + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 /* Disable paging */ movl %cr0, %eax @@ -328,7 +345,8 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) movl %eax, %cr0 jmp startup_32 -SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_END(efi32_entry) + __HEAD #endif .code64 @@ -847,7 +865,9 @@ SYM_FUNC_START(efi32_pe_entry) */ subl %esi, %ebx movl %ebx, rva(image_offset)(%ebp) // save image_offset - jmp efi32_pe_stub_entry + xorl %esi, %esi + jmp efi32_entry // pass %ecx, %edx, %esi + // no other registers remain live 2: popl %edi // restore callee-save registers popl %ebx -- GitLab From d8950e8e20e006c8cbc4cc1ff81c35921053a8a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:03 +0100 Subject: [PATCH 0515/1418] x86/boot/compressed: Move bootargs parsing out of 32-bit startup code commit 5c3a85f35b583259cf5ca0344cd79c8899ba1bb7 upstream. Move the logic that chooses between the different EFI entrypoints out of the 32-bit boot path, and into a 64-bit helper that can perform the same task much more cleanly. While at it, document the mixed mode boot flow in a code comment. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-4-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 43 ++++++++++++++++++++++++++++ arch/x86/boot/compressed/head_64.S | 24 +++------------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 67e7edcdfea8f..58ab2e1ffd92a 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -22,6 +22,49 @@ .code64 .text +/* + * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() + * is the first thing that runs after switching to long mode. Depending on + * whether the EFI handover protocol or the compat entry point was used to + * enter the kernel, it will either branch to the 64-bit EFI handover + * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a + * struct bootparams pointer as the third argument, so the presence of such a + * pointer is used to disambiguate. + * + * +--------------+ + * +------------------+ +------------+ +------>| efi_pe_entry | + * | efi32_pe_entry |---->| | | +-----------+--+ + * +------------------+ | | +------+----------------+ | + * | startup_32 |---->| startup_64_mixed_mode | | + * +------------------+ | | +------+----------------+ V + * | efi32_stub_entry |---->| | | +------------------+ + * +------------------+ +------------+ +---->| efi64_stub_entry | + * +-------------+----+ + * +------------+ +----------+ | + * | startup_64 |<----| efi_main |<--------------+ + * +------------+ +----------+ + */ +SYM_FUNC_START(startup_64_mixed_mode) + lea efi32_boot_args(%rip), %rdx + mov 0(%rdx), %edi + mov 4(%rdx), %esi + mov 8(%rdx), %edx // saved bootparams pointer + test %edx, %edx + jnz efi64_stub_entry + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + sub $40, %rsp + mov %rdi, %rcx // MS calling convention + mov %rsi, %rdx + jmp efi_pe_entry +SYM_FUNC_END(startup_64_mixed_mode) + SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 61b1be41867ce..71542630f00f5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -261,25 +261,9 @@ SYM_FUNC_START(startup_32) */ leal rva(startup_64)(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl rva(efi32_boot_args)(%ebp), %edi - testl %edi, %edi - jz 1f - leal rva(efi64_stub_entry)(%ebp), %eax - movl rva(efi32_boot_args+4)(%ebp), %esi - movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer - testl %edx, %edx - jnz 1f - /* - * efi_pe_entry uses MS calling convention, which requires 32 bytes of - * shadow space on the stack even if all arguments are passed in - * registers. We also need an additional 8 bytes for the space that - * would be occupied by the return address, and this also results in - * the correct stack alignment for entry. - */ - subl $40, %esp - leal rva(efi_pe_entry)(%ebp), %eax - movl %edi, %ecx // MS calling convention - movl %esi, %edx + cmpb $1, rva(efi_is64)(%ebp) + je 1f + leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif /* Check if the C-bit position is correct when SEV is active */ @@ -795,7 +779,7 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) SYM_DATA(image_offset, .long 0) #endif #ifdef CONFIG_EFI_MIXED -SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) #define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -- GitLab From c577208f81c9ddbc5ab1418bfe810680a671fa84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:04 +0100 Subject: [PATCH 0516/1418] x86/boot/compressed: Move efi32_pe_entry into .text section commit 91592b5c0c2f076ff9d8cc0c14aa563448ac9fc4 upstream. Move efi32_pe_entry() into the .text section, so that it can be moved out of head_64.S and into a separate compilation unit in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-5-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 71542630f00f5..14cd51d397195 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -786,7 +786,7 @@ SYM_DATA(efi_is64, .byte 1) #define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) #define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - __HEAD + .text .code32 SYM_FUNC_START(efi32_pe_entry) /* @@ -808,12 +808,11 @@ SYM_FUNC_START(efi32_pe_entry) call 1f 1: pop %ebx - subl $ rva(1b), %ebx /* Get the loaded image protocol pointer from the image handle */ leal -4(%ebp), %eax pushl %eax // &loaded_image - leal rva(loaded_image_proto)(%ebx), %eax + leal (loaded_image_proto - 1b)(%ebx), %eax pushl %eax // pass the GUID address pushl 8(%ebp) // pass the image handle @@ -842,13 +841,13 @@ SYM_FUNC_START(efi32_pe_entry) movl 12(%ebp), %edx // sys_table movl -4(%ebp), %esi // loaded_image movl LI32_image_base(%esi), %esi // loaded_image->image_base - movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry + leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 /* * We need to set the image_offset variable here since startup_32() will * use it before we get to the 64-bit efi_pe_entry() in C code. */ - subl %esi, %ebx - movl %ebx, rva(image_offset)(%ebp) // save image_offset + subl %esi, %ebp // calculate image_offset + movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live -- GitLab From 469b84516cc456fdf003dc99d9a9b0e7eab27c24 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:05 +0100 Subject: [PATCH 0517/1418] x86/boot/compressed: Move efi32_entry out of head_64.S commit 73a6dec80e2acedaef3ca603d4b5799049f6e9f8 upstream. Move the efi32_entry() routine out of head_64.S and into efi-mixed.S, which reduces clutter in the complicated startup routines. It also permits linkage of some symbols used by code to be made local. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-6-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 57 +++++++++++++++++++++++----- arch/x86/boot/compressed/head_64.S | 45 ---------------------- 2 files changed, 47 insertions(+), 55 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 58ab2e1ffd92a..3487484ac1fd5 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -105,7 +105,7 @@ SYM_FUNC_START(__efi64_thunk) /* * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT * and IDT that was installed when the kernel started executing. The - * pointers were saved at the EFI stub entry point in head_64.S. + * pointers were saved by the efi32_entry() routine below. * * Pass the saved DS selector to the 32-bit code, and use far return to * restore the saved CS selector. @@ -217,22 +217,59 @@ SYM_FUNC_START_LOCAL(efi_enter32) lret SYM_FUNC_END(efi_enter32) +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * %esi struct bootparams pointer (or NULL when not using + * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START(efi32_entry) + call 1f +1: pop %ebx + + /* Save firmware GDTR and code/data selectors */ + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) + + /* Store firmware IDT descriptor */ + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + jmp startup_32 +SYM_FUNC_END(efi32_entry) + .data .balign 8 -SYM_DATA_START(efi32_boot_gdt) +SYM_DATA_START_LOCAL(efi32_boot_gdt) .word 0 .quad 0 SYM_DATA_END(efi32_boot_gdt) -SYM_DATA_START(efi32_boot_idt) +SYM_DATA_START_LOCAL(efi32_boot_idt) .word 0 .quad 0 SYM_DATA_END(efi32_boot_idt) -SYM_DATA_START(efi32_boot_cs) - .word 0 -SYM_DATA_END(efi32_boot_cs) - -SYM_DATA_START(efi32_boot_ds) - .word 0 -SYM_DATA_END(efi32_boot_ds) +SYM_DATA_LOCAL(efi32_boot_cs, .word 0) +SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 14cd51d397195..2b812028fb2f0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -289,48 +289,6 @@ SYM_FUNC_START(efi32_stub_entry) popl %esi jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) - - .text -/* - * This is the common EFI stub entry point for mixed mode. - * - * Arguments: %ecx image handle - * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) - * - * Since this is the point of no return for ordinary execution, no registers - * are considered live except for the function parameters. [Note that the EFI - * stub may still exit and return to the firmware using the Exit() EFI boot - * service.] - */ -SYM_FUNC_START_LOCAL(efi32_entry) - call 1f -1: pop %ebx - - /* Save firmware GDTR and code/data selectors */ - sgdtl (efi32_boot_gdt - 1b)(%ebx) - movw %cs, (efi32_boot_cs - 1b)(%ebx) - movw %ds, (efi32_boot_ds - 1b)(%ebx) - - /* Store firmware IDT descriptor */ - sidtl (efi32_boot_idt - 1b)(%ebx) - - /* Store boot arguments */ - leal (efi32_boot_args - 1b)(%ebx), %ebx - movl %ecx, 0(%ebx) - movl %edx, 4(%ebx) - movl %esi, 8(%ebx) - movb $0x0, 12(%ebx) // efi_is64 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - jmp startup_32 -SYM_FUNC_END(efi32_entry) - __HEAD #endif .code64 @@ -779,9 +737,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) SYM_DATA(image_offset, .long 0) #endif #ifdef CONFIG_EFI_MIXED -SYM_DATA(efi32_boot_args, .long 0, 0, 0) -SYM_DATA(efi_is64, .byte 1) - #define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) #define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) #define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) -- GitLab From beeeb4655db99ed0eb70f6756518fd202fd12e1a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:06 +0100 Subject: [PATCH 0518/1418] x86/boot/compressed: Move efi32_pe_entry() out of head_64.S commit 7f22ca396778fea9332d83ec2359dbe8396e9a06 upstream. Move the implementation of efi32_pe_entry() into efi-mixed.S, which is a more suitable location that only gets built if EFI mixed mode is actually enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-7-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 82 ++++++++++++++++++++++++++ arch/x86/boot/compressed/head_64.S | 87 +--------------------------- 2 files changed, 83 insertions(+), 86 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 3487484ac1fd5..8844d8ed4b1c7 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -257,6 +257,88 @@ SYM_FUNC_START(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) +#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) +#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) +#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) + +/* + * efi_status_t efi32_pe_entry(efi_handle_t image_handle, + * efi_system_table_32_t *sys_table) + */ +SYM_FUNC_START(efi32_pe_entry) + pushl %ebp + movl %esp, %ebp + pushl %eax // dummy push to allocate loaded_image + + pushl %ebx // save callee-save registers + pushl %edi + + call verify_cpu // check for long mode support + testl %eax, %eax + movl $0x80000003, %eax // EFI_UNSUPPORTED + jnz 2f + + call 1f +1: pop %ebx + + /* Get the loaded image protocol pointer from the image handle */ + leal -4(%ebp), %eax + pushl %eax // &loaded_image + leal (loaded_image_proto - 1b)(%ebx), %eax + pushl %eax // pass the GUID address + pushl 8(%ebp) // pass the image handle + + /* + * Note the alignment of the stack frame. + * sys_table + * handle <-- 16-byte aligned on entry by ABI + * return address + * frame pointer + * loaded_image <-- local variable + * saved %ebx <-- 16-byte aligned here + * saved %edi + * &loaded_image + * &loaded_image_proto + * handle <-- 16-byte aligned for call to handle_protocol + */ + + movl 12(%ebp), %eax // sys_table + movl ST32_boottime(%eax), %eax // sys_table->boottime + call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol + addl $12, %esp // restore argument space + testl %eax, %eax + jnz 2f + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table + movl -4(%ebp), %esi // loaded_image + movl LI32_image_base(%esi), %esi // loaded_image->image_base + leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 + /* + * We need to set the image_offset variable here since startup_32() will + * use it before we get to the 64-bit efi_pe_entry() in C code. + */ + subl %esi, %ebp // calculate image_offset + movl %ebp, (image_offset - 1b)(%ebx) // save image_offset + xorl %esi, %esi + jmp efi32_entry // pass %ecx, %edx, %esi + // no other registers remain live + +2: popl %edi // restore callee-save registers + popl %ebx + leave + RET +SYM_FUNC_END(efi32_pe_entry) + + .section ".rodata" + /* EFI loaded image protocol GUID */ + .balign 4 +SYM_DATA_START_LOCAL(loaded_image_proto) + .long 0x5b1b31a1 + .word 0x9562, 0x11d2 + .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b +SYM_DATA_END(loaded_image_proto) + .data .balign 8 SYM_DATA_START_LOCAL(efi32_boot_gdt) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2b812028fb2f0..d04d981c2b9b6 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -689,6 +689,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) jmp 1b SYM_FUNC_END(.Lno_longmode) + .globl verify_cpu #include "../../kernel/verify_cpu.S" .data @@ -736,92 +737,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) #ifdef CONFIG_EFI_STUB SYM_DATA(image_offset, .long 0) #endif -#ifdef CONFIG_EFI_MIXED -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - - .text - .code32 -SYM_FUNC_START(efi32_pe_entry) -/* - * efi_status_t efi32_pe_entry(efi_handle_t image_handle, - * efi_system_table_32_t *sys_table) - */ - - pushl %ebp - movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - - pushl %ebx // save callee-save registers - pushl %edi - - call verify_cpu // check for long mode support - testl %eax, %eax - movl $0x80000003, %eax // EFI_UNSUPPORTED - jnz 2f - - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - - movl 8(%ebp), %ecx // image_handle - movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi - // no other registers remain live - -2: popl %edi // restore callee-save registers - popl %ebx - leave - RET -SYM_FUNC_END(efi32_pe_entry) - - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) -#endif #ifdef CONFIG_AMD_MEM_ENCRYPT __HEAD -- GitLab From ef12d049fa7b429a0f1842307e921da30ff2e97b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:07 +0100 Subject: [PATCH 0519/1418] x86/boot/compressed, efi: Merge multiple definitions of image_offset into one commit 4b52016247aeaa55ca3e3bc2e03cd91114c145c2 upstream. There is no need for head_32.S and head_64.S both declaring a copy of the global 'image_offset' variable, so drop those and make the extern C declaration the definition. When image_offset is moved to the .c file, it needs to be placed particularly in the .data section because it lands by default in the .bss section which is cleared too late, in .Lrelocated, before the first access to it and thus garbage gets read, leading to SEV guests exploding in early boot. This happens only when the SEV guest kernel is loaded through grub. If supplied with qemu's -kernel command line option, that memory is always cleared upfront by qemu and all is fine there. [ bp: Expand commit message with SEV aspect. ] Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-8-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 4 ---- arch/x86/boot/compressed/head_64.S | 4 ---- drivers/firmware/efi/libstub/x86-stub.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3b354eb9516df..6589ddd4cfaf2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -208,10 +208,6 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x00cf92000000ffff /* __KERNEL_DS */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d04d981c2b9b6..2235c3a13f547 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -734,10 +734,6 @@ SYM_DATA_START(boot32_idt) SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) #endif -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - #ifdef CONFIG_AMD_MEM_ENCRYPT __HEAD .code32 diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 4f0152b11a890..9ae0d6d0c285f 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -23,7 +23,7 @@ const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -extern u32 image_offset; +u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; static efi_status_t -- GitLab From 88035744b91a187bcc23253a73ef3b1ccc08a2f9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:08 +0100 Subject: [PATCH 0520/1418] x86/boot/compressed: Simplify IDT/GDT preserve/restore in the EFI thunk commit 630f337f0c4fd80390e8600adcab31550aea33df upstream. Tweak the asm and remove some redundant instructions. While at it, fix the associated comment for style and correctness. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-9-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8844d8ed4b1c7..8b02e507d3bb0 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -96,24 +96,20 @@ SYM_FUNC_START(__efi64_thunk) leaq 0x20(%rsp), %rbx sgdt (%rbx) - - addq $16, %rbx - sidt (%rbx) + sidt 16(%rbx) leaq 1f(%rip), %rbp /* - * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT - * and IDT that was installed when the kernel started executing. The - * pointers were saved by the efi32_entry() routine below. + * Switch to IDT and GDT with 32-bit segments. These are the firmware + * GDT and IDT that were installed when the kernel started executing. + * The pointers were saved by the efi32_entry() routine below. * * Pass the saved DS selector to the 32-bit code, and use far return to * restore the saved CS selector. */ - leaq efi32_boot_idt(%rip), %rax - lidt (%rax) - leaq efi32_boot_gdt(%rip), %rax - lgdt (%rax) + lidt efi32_boot_idt(%rip) + lgdt efi32_boot_gdt(%rip) movzwl efi32_boot_ds(%rip), %edx movzwq efi32_boot_cs(%rip), %rax @@ -187,9 +183,7 @@ SYM_FUNC_START_LOCAL(efi_enter32) */ cli - lidtl (%ebx) - subl $16, %ebx - + lidtl 16(%ebx) lgdtl (%ebx) movl %cr4, %eax -- GitLab From 530a4271b7ba5776b7f5a67015ae63a3ba3d2348 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:09 +0100 Subject: [PATCH 0521/1418] x86/boot/compressed: Avoid touching ECX in startup32_set_idt_entry() commit 6aac80a8da46d70f2ae7ff97c9f45a15c7c9b3ef upstream. Avoid touching register %ecx in startup32_set_idt_entry(), by folding the MOV, SHL and ORL instructions into a single ORL which no longer requires a temp register. This permits ECX to be used as a function argument in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-10-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2235c3a13f547..e90cbbb2903d9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -749,7 +749,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) */ SYM_FUNC_START(startup32_set_idt_entry) push %ebx - push %ecx /* IDT entry address to %ebx */ leal rva(boot32_idt)(%ebp), %ebx @@ -758,10 +757,8 @@ SYM_FUNC_START(startup32_set_idt_entry) /* Build IDT entry, lower 4 bytes */ movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - movl $__KERNEL32_CS, %ecx # Target code segment selector - shl $16, %ecx - orl %ecx, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector /* Store lower 4 bytes to IDT */ movl %edx, (%ebx) @@ -774,7 +771,6 @@ SYM_FUNC_START(startup32_set_idt_entry) /* Store upper 4 bytes to IDT */ movl %edx, 4(%ebx) - pop %ecx pop %ebx RET SYM_FUNC_END(startup32_set_idt_entry) -- GitLab From 29134968f72da9337dff949bba0bdb0c5134ba0a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:10 +0100 Subject: [PATCH 0522/1418] x86/boot/compressed: Pull global variable reference into startup32_load_idt() commit d73a257f7f86871c3aac24dc20538e3983096647 upstream. In preparation for moving startup32_load_idt() out of head_64.S and turning it into an ordinary function using the ordinary 32-bit calling convention, pull the global variable reference to boot32_idt up into startup32_load_idt() so that startup32_set_idt_entry() does not need to discover its own runtime physical address, which will no longer be correlated with startup_32 once this code is moved into .text. While at it, give startup32_set_idt_entry() static linkage. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-11-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e90cbbb2903d9..9d1e1ccab2566 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -744,16 +744,11 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) * * %eax: Handler address * %edx: Vector number - * - * Physical offset is expected in %ebp + * %ecx: IDT address */ -SYM_FUNC_START(startup32_set_idt_entry) - push %ebx - - /* IDT entry address to %ebx */ - leal rva(boot32_idt)(%ebp), %ebx - shl $3, %edx - addl %edx, %ebx +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx /* Build IDT entry, lower 4 bytes */ movl %eax, %edx @@ -761,7 +756,7 @@ SYM_FUNC_START(startup32_set_idt_entry) orl $(__KERNEL32_CS << 16), %edx # Target code segment selector /* Store lower 4 bytes to IDT */ - movl %edx, (%ebx) + movl %edx, (%ecx) /* Build IDT entry, upper 4 bytes */ movl %eax, %edx @@ -769,15 +764,16 @@ SYM_FUNC_START(startup32_set_idt_entry) orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ebx) + movl %edx, 4(%ecx) - pop %ebx RET SYM_FUNC_END(startup32_set_idt_entry) #endif SYM_FUNC_START(startup32_load_idt) #ifdef CONFIG_AMD_MEM_ENCRYPT + leal rva(boot32_idt)(%ebp), %ecx + /* #VC handler */ leal rva(startup32_vc_handler)(%ebp), %eax movl $X86_TRAP_VC, %edx -- GitLab From 2e47116315a08bd5fa451bbeb66cb14ffc3f0de1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:11 +0100 Subject: [PATCH 0523/1418] x86/boot/compressed: Move startup32_load_idt() into .text section commit c6355995ba471d7ad574174e593192ce805c7e1a upstream. Convert startup32_load_idt() into an ordinary function and move it into the .text section. This involves turning the rva() immediates into ones derived from a local label, and preserving/restoring the %ebp and %ebx as per the calling convention. Also move the #ifdef to the only existing call site. This makes it clear that the function call does nothing if support for memory encryption is not compiled in. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-12-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 9d1e1ccab2566..53f3f01f88af2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32) 1: /* Setup Exception handling for SEV-ES */ +#ifdef CONFIG_AMD_MEM_ENCRYPT call startup32_load_idt +#endif /* Make sure cpu supports long mode. */ call verify_cpu @@ -732,10 +734,8 @@ SYM_DATA_START(boot32_idt) .quad 0 .endr SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) -#endif -#ifdef CONFIG_AMD_MEM_ENCRYPT - __HEAD + .text .code32 /* * Write an IDT entry into boot32_idt @@ -768,24 +768,32 @@ SYM_FUNC_START_LOCAL(startup32_set_idt_entry) RET SYM_FUNC_END(startup32_set_idt_entry) -#endif SYM_FUNC_START(startup32_load_idt) -#ifdef CONFIG_AMD_MEM_ENCRYPT - leal rva(boot32_idt)(%ebp), %ecx + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx /* #VC handler */ - leal rva(startup32_vc_handler)(%ebp), %eax + leal (startup32_vc_handler - 1b)(%ebp), %eax movl $X86_TRAP_VC, %edx + movl %ebx, %ecx call startup32_set_idt_entry /* Load IDT */ - leal rva(boot32_idt)(%ebp), %eax - movl %eax, rva(boot32_idt_desc+2)(%ebp) - lidt rva(boot32_idt_desc)(%ebp) -#endif + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp RET SYM_FUNC_END(startup32_load_idt) +#endif /* * Check for the correct C-bit position when the startup_32 boot-path is used. @@ -804,6 +812,7 @@ SYM_FUNC_END(startup32_load_idt) * succeed. An incorrect C-bit position will map all memory unencrypted, so that * the compare will use the encrypted random data and fail. */ + __HEAD SYM_FUNC_START(startup32_check_sev_cbit) #ifdef CONFIG_AMD_MEM_ENCRYPT pushl %eax -- GitLab From 801873f1750aa1cc42e290d8a818e340fd7d0987 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:12 +0100 Subject: [PATCH 0524/1418] x86/boot/compressed: Move startup32_load_idt() out of head_64.S commit 9ea813be3d345dfb8ac5bf6fbb29e6a63647a39d upstream. Now that startup32_load_idt() has been refactored into an ordinary callable function, move it into mem-encrypt.S where it belongs. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-13-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 72 -------------------------- arch/x86/boot/compressed/mem_encrypt.S | 72 +++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 73 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 53f3f01f88af2..b1d00f862af91 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -723,78 +723,6 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -#ifdef CONFIG_AMD_MEM_ENCRYPT -SYM_DATA_START(boot32_idt_desc) - .word boot32_idt_end - boot32_idt - 1 - .long 0 -SYM_DATA_END(boot32_idt_desc) - .balign 8 -SYM_DATA_START(boot32_idt) - .rept 32 - .quad 0 - .endr -SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) - - .text - .code32 -/* - * Write an IDT entry into boot32_idt - * - * Parameters: - * - * %eax: Handler address - * %edx: Vector number - * %ecx: IDT address - */ -SYM_FUNC_START_LOCAL(startup32_set_idt_entry) - /* IDT entry address to %ecx */ - leal (%ecx, %edx, 8), %ecx - - /* Build IDT entry, lower 4 bytes */ - movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - orl $(__KERNEL32_CS << 16), %edx # Target code segment selector - - /* Store lower 4 bytes to IDT */ - movl %edx, (%ecx) - - /* Build IDT entry, upper 4 bytes */ - movl %eax, %edx - andl $0xffff0000, %edx # Target code segment offset [31:16] - orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate - - /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ecx) - - RET -SYM_FUNC_END(startup32_set_idt_entry) - -SYM_FUNC_START(startup32_load_idt) - push %ebp - push %ebx - - call 1f -1: pop %ebp - - leal (boot32_idt - 1b)(%ebp), %ebx - - /* #VC handler */ - leal (startup32_vc_handler - 1b)(%ebp), %eax - movl $X86_TRAP_VC, %edx - movl %ebx, %ecx - call startup32_set_idt_entry - - /* Load IDT */ - leal (boot32_idt_desc - 1b)(%ebp), %ecx - movl %ebx, 2(%ecx) - lidt (%ecx) - - pop %ebx - pop %ebp - RET -SYM_FUNC_END(startup32_load_idt) -#endif - /* * Check for the correct C-bit position when the startup_32 boot-path is used. * diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a73e4d783cae2..6747e5e4c6966 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -12,6 +12,8 @@ #include #include #include +#include +#include .text .code32 @@ -98,7 +100,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) jmp 1b SYM_CODE_END(sev_es_req_cpuid) -SYM_CODE_START(startup32_vc_handler) +SYM_CODE_START_LOCAL(startup32_vc_handler) pushl %eax pushl %ebx pushl %ecx @@ -184,6 +186,63 @@ SYM_CODE_START(startup32_vc_handler) jmp .Lfail SYM_CODE_END(startup32_vc_handler) +/* + * Write an IDT entry into boot32_idt + * + * Parameters: + * + * %eax: Handler address + * %edx: Vector number + * %ecx: IDT address + */ +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx + + /* Build IDT entry, lower 4 bytes */ + movl %eax, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector + + /* Store lower 4 bytes to IDT */ + movl %edx, (%ecx) + + /* Build IDT entry, upper 4 bytes */ + movl %eax, %edx + andl $0xffff0000, %edx # Target code segment offset [31:16] + orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate + + /* Store upper 4 bytes to IDT */ + movl %edx, 4(%ecx) + + RET +SYM_FUNC_END(startup32_set_idt_entry) + +SYM_FUNC_START(startup32_load_idt) + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx + + /* #VC handler */ + leal (startup32_vc_handler - 1b)(%ebp), %eax + movl $X86_TRAP_VC, %edx + movl %ebx, %ecx + call startup32_set_idt_entry + + /* Load IDT */ + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp + RET +SYM_FUNC_END(startup32_load_idt) + .code64 #include "../../kernel/sev_verify_cbit.S" @@ -195,4 +254,15 @@ SYM_CODE_END(startup32_vc_handler) SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) SYM_DATA(sev_check_data, .quad 0) + +SYM_DATA_START_LOCAL(boot32_idt) + .rept 32 + .quad 0 + .endr +SYM_DATA_END(boot32_idt) + +SYM_DATA_START_LOCAL(boot32_idt_desc) + .word . - boot32_idt - 1 + .long 0 +SYM_DATA_END(boot32_idt_desc) #endif -- GitLab From e840ae3dc277f7f4ae38f600e7f5da7f169b8d7c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:13 +0100 Subject: [PATCH 0525/1418] x86/boot/compressed: Move startup32_check_sev_cbit() into .text commit b5d854cd4b6a314edd6c15dabc4233b84a0f8e5e upstream. Move startup32_check_sev_cbit() into the .text section and turn it into an ordinary function using the ordinary 32-bit calling convention, instead of saving/restoring the registers that are known to be live at the only call site. This improves maintainability, and makes it possible to move this function out of head_64.S and into a separate compilation unit that is specific to memory encryption. Note that this requires the call site to be moved before the mixed mode check, as %eax will be live otherwise. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-14-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b1d00f862af91..c7655a9dfd3f8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -251,6 +251,11 @@ SYM_FUNC_START(startup_32) movl $__BOOT_TSS, %eax ltr %ax +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit +#endif + /* * Setup for the jump to 64bit mode * @@ -268,8 +273,6 @@ SYM_FUNC_START(startup_32) leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif - /* Check if the C-bit position is correct when SEV is active */ - call startup32_check_sev_cbit pushl $__KERNEL_CS pushl %eax @@ -740,16 +743,17 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) * succeed. An incorrect C-bit position will map all memory unencrypted, so that * the compare will use the encrypted random data and fail. */ - __HEAD -SYM_FUNC_START(startup32_check_sev_cbit) #ifdef CONFIG_AMD_MEM_ENCRYPT - pushl %eax + .text +SYM_FUNC_START(startup32_check_sev_cbit) pushl %ebx - pushl %ecx - pushl %edx + pushl %ebp + + call 0f +0: popl %ebp /* Check for non-zero sev_status */ - movl rva(sev_status)(%ebp), %eax + movl (sev_status - 0b)(%ebp), %eax testl %eax, %eax jz 4f @@ -764,17 +768,18 @@ SYM_FUNC_START(startup32_check_sev_cbit) jnc 2b /* Store to memory and keep it in the registers */ - movl %eax, rva(sev_check_data)(%ebp) - movl %ebx, rva(sev_check_data+4)(%ebp) + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) /* Enable paging to see if encryption is active */ movl %cr0, %edx /* Backup %cr0 in %edx */ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ movl %ecx, %cr0 - cmpl %eax, rva(sev_check_data)(%ebp) + cmpl %eax, 0(%ebp) jne 3f - cmpl %ebx, rva(sev_check_data+4)(%ebp) + cmpl %ebx, 4(%ebp) jne 3f movl %edx, %cr0 /* Restore previous %cr0 */ @@ -786,13 +791,11 @@ SYM_FUNC_START(startup32_check_sev_cbit) jmp 3b 4: - popl %edx - popl %ecx + popl %ebp popl %ebx - popl %eax -#endif RET SYM_FUNC_END(startup32_check_sev_cbit) +#endif /* * Stack and heap for uncompression -- GitLab From 0912dce9ed4e8a6442fb39627cd37ca5a25beec5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:14 +0100 Subject: [PATCH 0526/1418] x86/boot/compressed: Move startup32_check_sev_cbit() out of head_64.S commit 9d7eaae6a071ff1f718e0aa5e610bb712f8cc632 upstream. Now that the startup32_check_sev_cbit() routine can execute from anywhere and behaves like an ordinary function, it can be moved where it belongs. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-15-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 71 -------------------------- arch/x86/boot/compressed/mem_encrypt.S | 68 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 71 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c7655a9dfd3f8..43a82df0e9d63 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -726,77 +726,6 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -/* - * Check for the correct C-bit position when the startup_32 boot-path is used. - * - * The check makes use of the fact that all memory is encrypted when paging is - * disabled. The function creates 64 bits of random data using the RDRAND - * instruction. RDRAND is mandatory for SEV guests, so always available. If the - * hypervisor violates that the kernel will crash right here. - * - * The 64 bits of random data are stored to a memory location and at the same - * time kept in the %eax and %ebx registers. Since encryption is always active - * when paging is off the random data will be stored encrypted in main memory. - * - * Then paging is enabled. When the C-bit position is correct all memory is - * still mapped encrypted and comparing the register values with memory will - * succeed. An incorrect C-bit position will map all memory unencrypted, so that - * the compare will use the encrypted random data and fail. - */ -#ifdef CONFIG_AMD_MEM_ENCRYPT - .text -SYM_FUNC_START(startup32_check_sev_cbit) - pushl %ebx - pushl %ebp - - call 0f -0: popl %ebp - - /* Check for non-zero sev_status */ - movl (sev_status - 0b)(%ebp), %eax - testl %eax, %eax - jz 4f - - /* - * Get two 32-bit random values - Don't bail out if RDRAND fails - * because it is better to prevent forward progress if no random value - * can be gathered. - */ -1: rdrand %eax - jnc 1b -2: rdrand %ebx - jnc 2b - - /* Store to memory and keep it in the registers */ - leal (sev_check_data - 0b)(%ebp), %ebp - movl %eax, 0(%ebp) - movl %ebx, 4(%ebp) - - /* Enable paging to see if encryption is active */ - movl %cr0, %edx /* Backup %cr0 in %edx */ - movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ - movl %ecx, %cr0 - - cmpl %eax, 0(%ebp) - jne 3f - cmpl %ebx, 4(%ebp) - jne 3f - - movl %edx, %cr0 /* Restore previous %cr0 */ - - jmp 4f - -3: /* Check failed - hlt the machine */ - hlt - jmp 3b - -4: - popl %ebp - popl %ebx - RET -SYM_FUNC_END(startup32_check_sev_cbit) -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 6747e5e4c6966..14cf04a1ed091 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -243,6 +243,74 @@ SYM_FUNC_START(startup32_load_idt) RET SYM_FUNC_END(startup32_load_idt) +/* + * Check for the correct C-bit position when the startup_32 boot-path is used. + * + * The check makes use of the fact that all memory is encrypted when paging is + * disabled. The function creates 64 bits of random data using the RDRAND + * instruction. RDRAND is mandatory for SEV guests, so always available. If the + * hypervisor violates that the kernel will crash right here. + * + * The 64 bits of random data are stored to a memory location and at the same + * time kept in the %eax and %ebx registers. Since encryption is always active + * when paging is off the random data will be stored encrypted in main memory. + * + * Then paging is enabled. When the C-bit position is correct all memory is + * still mapped encrypted and comparing the register values with memory will + * succeed. An incorrect C-bit position will map all memory unencrypted, so that + * the compare will use the encrypted random data and fail. + */ +SYM_FUNC_START(startup32_check_sev_cbit) + pushl %ebx + pushl %ebp + + call 0f +0: popl %ebp + + /* Check for non-zero sev_status */ + movl (sev_status - 0b)(%ebp), %eax + testl %eax, %eax + jz 4f + + /* + * Get two 32-bit random values - Don't bail out if RDRAND fails + * because it is better to prevent forward progress if no random value + * can be gathered. + */ +1: rdrand %eax + jnc 1b +2: rdrand %ebx + jnc 2b + + /* Store to memory and keep it in the registers */ + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) + + /* Enable paging to see if encryption is active */ + movl %cr0, %edx /* Backup %cr0 in %edx */ + movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ + movl %ecx, %cr0 + + cmpl %eax, 0(%ebp) + jne 3f + cmpl %ebx, 4(%ebp) + jne 3f + + movl %edx, %cr0 /* Restore previous %cr0 */ + + jmp 4f + +3: /* Check failed - hlt the machine */ + hlt + jmp 3b + +4: + popl %ebp + popl %ebx + RET +SYM_FUNC_END(startup32_check_sev_cbit) + .code64 #include "../../kernel/sev_verify_cbit.S" -- GitLab From cac22c9a5e661a000e734af797641375fa181dbc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:15 +0100 Subject: [PATCH 0527/1418] x86/boot/compressed: Adhere to calling convention in get_sev_encryption_bit() commit 30c9ca16a5271ba6f8ad9c86507ff1c789c94677 upstream. Make get_sev_encryption_bit() follow the ordinary i386 calling convention, and only call it if CONFIG_AMD_MEM_ENCRYPT is actually enabled. This clarifies the calling code, and makes it more maintainable. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-16-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 5 +++-- arch/x86/boot/compressed/mem_encrypt.S | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 43a82df0e9d63..cd4eb22aa84b1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -180,12 +180,13 @@ SYM_FUNC_START(startup_32) */ /* * If SEV is active then set the encryption mask in the page tables. - * This will insure that when the kernel is copied and decompressed + * This will ensure that when the kernel is copied and decompressed * it will be done so encrypted. */ - call get_sev_encryption_bit xorl %edx, %edx #ifdef CONFIG_AMD_MEM_ENCRYPT + call get_sev_encryption_bit + xorl %edx, %edx testl %eax, %eax jz 1f subl $32, %eax /* Encryption bit is always above bit 31 */ diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 14cf04a1ed091..e69674588a31c 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -18,12 +18,7 @@ .text .code32 SYM_FUNC_START(get_sev_encryption_bit) - xor %eax, %eax - -#ifdef CONFIG_AMD_MEM_ENCRYPT push %ebx - push %ecx - push %edx movl $0x80000000, %eax /* CPUID to check the highest leaf */ cpuid @@ -54,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit) xor %eax, %eax .Lsev_exit: - pop %edx - pop %ecx pop %ebx - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - RET SYM_FUNC_END(get_sev_encryption_bit) -- GitLab From 71c43b714fd688ff5ee6d906e5cc38e6a8f2836f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:16 +0100 Subject: [PATCH 0528/1418] x86/boot/compressed: Only build mem_encrypt.S if AMD_MEM_ENCRYPT=y commit 61de13df95901bc58456bc5acdbd3c18c66cf859 upstream. Avoid building the mem_encrypt.o object if memory encryption support is not enabled to begin with. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-17-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/mem_encrypt.S | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 27c82c78ac260..0c9ebf74fac59 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -100,7 +100,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o - vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index e69674588a31c..32f7cc8a86254 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -307,7 +307,6 @@ SYM_FUNC_END(startup32_check_sev_cbit) .data -#ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) @@ -323,4 +322,3 @@ SYM_DATA_START_LOCAL(boot32_idt_desc) .word . - boot32_idt - 1 .long 0 SYM_DATA_END(boot32_idt_desc) -#endif -- GitLab From a8901f331b8b7f95a7315d033a22bc84c8365f35 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Jan 2023 17:42:54 +0100 Subject: [PATCH 0529/1418] efi: verify that variable services are supported commit bad267f9e18f8e9e628abd1811d2899b1735a4e1 upstream. Current Qualcomm UEFI firmware does not implement the variable services but not all revisions clear the corresponding bits in the RT_PROP table services mask and instead the corresponding calls return EFI_UNSUPPORTED. This leads to efi core registering the generic efivar ops even when the variable services are not supported or when they are accessed through some other interface (e.g. Google SMI or the upcoming Qualcomm SCM implementation). Instead of playing games with init call levels to make sure that the custom implementations are registered after the generic one, make sure that get_next_variable() is actually supported before registering the generic ops. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index b7c0e8cc0764f..9077353d1c98d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = { static struct efivars generic_efivars; static struct efivar_operations generic_ops; +static bool generic_ops_supported(void) +{ + unsigned long name_size; + efi_status_t status; + efi_char16_t name; + efi_guid_t guid; + + name_size = sizeof(name); + + status = efi.get_next_variable(&name_size, &name, &guid); + if (status == EFI_UNSUPPORTED) + return false; + + return true; +} + static int generic_ops_register(void) { + if (!generic_ops_supported()) + return 0; + generic_ops.get_variable = efi.get_variable; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; @@ -200,6 +219,9 @@ static int generic_ops_register(void) static void generic_ops_unregister(void) { + if (!generic_ops.get_variable) + return; + efivars_unregister(&generic_efivars); } -- GitLab From 7bc9533e077e2553264b447189d13f83c47770a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:17 +0100 Subject: [PATCH 0530/1418] x86/efi: Make the deprecated EFI handover protocol optional commit cc3fdda2876e58a7e83e558ab51853cf106afb6a upstream. The EFI handover protocol permits a bootloader to invoke the kernel as a EFI PE/COFF application, while passing a bootparams struct as a third argument to the entrypoint function call. This has no basis in the UEFI specification, and there are better ways to pass additional data to a UEFI application (UEFI configuration tables, UEFI variables, UEFI protocols) than going around the StartImage() boot service and jumping to a fixed offset in the loaded image, just to call a different function that takes a third parameter. The reason for handling struct bootparams in the bootloader was that the EFI stub could only load initrd images from the EFI system partition, and so passing it via struct bootparams was needed for loaders like GRUB, which pass the initrd in memory, and may load it from anywhere, including from the network. Another motivation was EFI mixed mode, which could not use the initrd loader in the EFI stub at all due to 32/64 bit incompatibilities (which will be fixed shortly [0]), and could not invoke the ordinary PE/COFF entry point either, for the same reasons. Given that loaders such as GRUB already carried the bootparams handling in order to implement non-EFI boot, retaining that code and just passing bootparams to the EFI stub was a reasonable choice (although defining an alternate entrypoint could have been avoided.) However, the GRUB side changes never made it upstream, and are only shipped by some of the distros in their downstream versions. In the meantime, EFI support has been added to other Linux architecture ports, as well as to U-boot and systemd, including arch-agnostic methods for passing initrd images in memory [1], and for doing mixed mode boot [2], none of them requiring anything like the EFI handover protocol. So given that only out-of-tree distro GRUB relies on this, let's permit it to be omitted from the build, in preparation for retiring it completely at a later date. (Note that systemd-boot does have an implementation as well, but only uses it as a fallback for booting images that do not implement the LoadFile2 based initrd loading method, i.e., v5.8 or older) [0] https://lore.kernel.org/all/20220927085842.2860715-1-ardb@kernel.org/ [1] ec93fc371f01 ("efi/libstub: Add support for loading the initrd from a device path") [2] 97aa276579b2 ("efi/x86: Add true mixed mode entry point into .compat section") Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-18-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 17 +++++++++++++++++ arch/x86/boot/compressed/head_64.S | 4 +++- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 2 ++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c9bfc4be58d4..2f7af61b49b6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1982,6 +1982,23 @@ config EFI_STUB See Documentation/admin-guide/efi-stub.rst for more information. +config EFI_HANDOVER_PROTOCOL + bool "EFI handover protocol (DEPRECATED)" + depends on EFI_STUB + default y + help + Select this in order to include support for the deprecated EFI + handover protocol, which defines alternative entry points into the + EFI stub. This is a practice that has no basis in the UEFI + specification, and requires a priori knowledge on the part of the + bootloader about Linux/x86 specific ways of passing the command line + and initrd, and where in memory those assets may be loaded. + + If in doubt, say Y. Even though the corresponding support is not + present in upstream GRUB or other bootloaders, most distros build + GRUB with numerous downstream patches applied, and may rely on the + handover protocol as as result. + config EFI_MIXED bool "EFI mixed-mode support" depends on EFI_STUB && X86_64 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index cd4eb22aa84b1..96c61c9883c36 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -286,7 +286,7 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_MIXED +#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) .org 0x190 SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp /* Discard return address */ @@ -535,7 +535,9 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL .org 0x390 +#endif SYM_FUNC_START(efi64_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f912d77701305..d31982509654d 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -406,7 +406,7 @@ xloadflags: # define XLF1 0 #endif -#ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL # ifdef CONFIG_EFI_MIXED # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) # else diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a3725ad46c5a0..bd247692b7017 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -290,6 +290,7 @@ static void efi_stub_entry_update(void) { unsigned long addr = efi32_stub_entry; +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ addr = efi64_stub_entry - 0x200; @@ -298,6 +299,7 @@ static void efi_stub_entry_update(void) #ifdef CONFIG_EFI_MIXED if (efi32_stub_entry != addr) die("32-bit and 64-bit EFI entry points do not match\n"); +#endif #endif put_unaligned_le32(addr, &buf[0x264]); } -- GitLab From 4f3077c3eae7e68e2c0ba6d1bd3f5afeb61eb269 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 9 Jan 2023 18:04:02 +0100 Subject: [PATCH 0531/1418] x86/boot: Robustify calling startup_{32,64}() from the decompressor code commit 7734a0f31e99c433df3063bbb7e8ee5a16a2cb82 upstream. After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I started digging whether x86 is ready for removing this old cruft. Removing its objects from the list makes the kernel unbootable. This applies only to bzImage, vmlinux still works correctly. The reason is that with no strict object order determined by the linker arguments, not the linker script, startup_64 can be placed not right at the beginning of the kernel. Here's vmlinux.map's beginning before removing: ffffffff81000000 vmlinux.o:(.head.text) ffffffff81000000 startup_64 ffffffff81000070 secondary_startup_64 ffffffff81000075 secondary_startup_64_no_verify ffffffff81000160 verify_cpu and after: ffffffff81000000 vmlinux.o:(.head.text) ffffffff81000000 pvh_start_xen ffffffff81000080 startup_64 ffffffff810000f0 secondary_startup_64 ffffffff810000f5 secondary_startup_64_no_verify Not a problem itself, but the self-extractor code has the address of that function hardcoded the beginning, not looking onto the ELF header, which always contains the address of startup_{32,64}(). So, instead of doing an "act of blind faith", just take the address from the ELF header and extract a relative offset to the entry point. The decompressor function already returns a pointer to the beginning of the kernel to the Asm code, which then jumps to it, so add that offset to the return value. This doesn't change anything for now, but allows to resign from the "head object list" for x86 and makes sure valid Kbuild or any other improvements won't break anything here in general. Signed-off-by: Alexander Lobakin Signed-off-by: Ingo Molnar Tested-by: Jiri Slaby Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20230109170403.4117105-2-alexandr.lobakin@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 2 +- arch/x86/boot/compressed/misc.c | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 6589ddd4cfaf2..987ae727cf9f0 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -187,7 +187,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) leal boot_heap@GOTOFF(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call extract_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp /* diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 96c61c9883c36..3a970a388adb8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -580,7 +580,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) movl input_len(%rip), %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movl output_len(%rip), %r9d /* decompressed length, end of relocs */ - call extract_kernel /* returns kernel location in %rax */ + call extract_kernel /* returns kernel entry point in %rax */ popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cf690d8712f4e..014ff222bf4b3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -293,10 +293,8 @@ static void parse_elf(void *output) if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -328,6 +326,8 @@ static void parse_elf(void *output) } free(phdrs); + + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } /* @@ -356,6 +356,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; unsigned long needed_size; + size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -456,14 +457,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); - parse_elf(output); + entry_offset = parse_elf(output); handle_relocations(output, output_len, virt_addr); - debug_putstr("done.\nBooting the kernel.\n"); + + debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); + debug_puthex(entry_offset); + debug_putstr(").\n"); /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output; + return output + entry_offset; } void fortify_panic(const char *name) -- GitLab From 51a0710218cea5c7d5528b92ba19e964423c7f5a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:00 +0200 Subject: [PATCH 0532/1418] x86/efistub: Branch straight to kernel entry point from C code commit d2d7a54f69b67cd0a30e0ebb5307cb2de625baac upstream. Instead of returning to the calling code in assembler that does nothing more than perform an indirect call with the boot_params pointer in register ESI/RSI, perform the jump directly from the EFI stub C code. This will allow the asm entrypoint code to be dropped entirely in subsequent patches. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-4-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9ae0d6d0c285f..9422fddfbc8f1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -279,7 +279,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) #define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) #define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) -void startup_32(struct boot_params *boot_params); +extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) @@ -760,10 +760,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static void __noreturn enter_kernel(unsigned long kernel_addr, + struct boot_params *boot_params) +{ + /* enter decompressed kernel with boot_params pointer in RSI/ESI */ + asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); + + unreachable(); +} + /* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. - * On failure, we exit to the firmware via efi_exit instead of returning. + * On success, this routine will jump to the relocated image directly and never + * return. On failure, it will exit to the firmware via efi_exit() instead of + * returning. */ asmlinkage unsigned long efi_main(efi_handle_t handle, efi_system_table_t *sys_table_arg, @@ -905,7 +914,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, goto fail; } - return bzimage_addr; + if (IS_ENABLED(CONFIG_X86_64)) + bzimage_addr += startup_64 - startup_32; + + enter_kernel(bzimage_addr, boot_params); fail: efi_err("efi_main() failed!\n"); -- GitLab From 2cca5f519e3a967f4b5b72e69758521401f021eb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:04 +0200 Subject: [PATCH 0533/1418] x86/decompressor: Store boot_params pointer in callee save register commit 8b63cba746f86a754d66e302c43209cc9b9b6e39 upstream. Instead of pushing and popping %RSI several times to preserve the struct boot_params pointer across the execution of the startup code, move it into a callee save register before the first call into C, and copy it back when needed. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-8-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 42 ++++++++++++------------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 3a970a388adb8..bb268d165906b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -408,10 +408,14 @@ SYM_CODE_START(startup_64) lretq .Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 - pushq %rsi call load_stage1_idt - popq %rsi #ifdef CONFIG_AMD_MEM_ENCRYPT /* @@ -422,12 +426,10 @@ SYM_CODE_START(startup_64) * CPUID instructions being issued, so go ahead and do that now via * sev_enable(), which will also handle the rest of the SEV-related * detection/setup to ensure that has been done in advance of any dependent - * code. + * code. Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call sev_enable - popq %rsi #endif /* @@ -440,13 +442,10 @@ SYM_CODE_START(startup_64) * - Non zero RDX means trampoline needs to enable 5-level * paging. * - * RSI holds real mode data and needs to be preserved across - * this function call. + * Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call paging_prepare - popq %rsi /* Save the trampoline address in RCX */ movq %rax, %rcx @@ -459,9 +458,9 @@ SYM_CODE_START(startup_64) * because the architecture does not guarantee that GPRs will retain * their full 64-bit values across a 32-bit mode switch. */ + pushq %r15 pushq %rbp pushq %rbx - pushq %rsi /* * Push the 64-bit address of trampoline_return() onto the new stack. @@ -478,9 +477,9 @@ SYM_CODE_START(startup_64) lretq trampoline_return: /* Restore live 64-bit registers */ - popq %rsi popq %rbx popq %rbp + popq %r15 /* Restore the stack, the 32-bit trampoline uses its own stack */ leaq rva(boot_stack_end)(%rbx), %rsp @@ -490,14 +489,9 @@ trampoline_return: * * RDI is address of the page table to use instead of page table * in trampoline memory (if required). - * - * RSI holds real mode data and needs to be preserved across - * this function call. */ - pushq %rsi leaq rva(top_pgtable)(%rbx), %rdi call cleanup_trampoline - popq %rsi /* Zero EFLAGS */ pushq $0 @@ -507,7 +501,6 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi movl $(_bss - startup_32), %ecx @@ -515,7 +508,6 @@ trampoline_return: std rep movsq cld - popq %rsi /* * The GDT may get overwritten either during the copy we just did or @@ -562,30 +554,28 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq - pushq %rsi call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ - movq (%rsp), %rdi + movq %r15, %rdi call initialize_identity_maps - popq %rsi /* * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ + /* pass struct boot_params pointer */ + movq %r15, %rdi leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ movl input_len(%rip), %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movl output_len(%rip), %r9d /* decompressed length, end of relocs */ call extract_kernel /* returns kernel entry point in %rax */ - popq %rsi /* * Jump to the decompressed kernel. */ + movq %r15, %rsi jmp *%rax SYM_FUNC_END(.Lrelocated) -- GitLab From 99a20f58913a4093c73817f5b364f0cf050a6d75 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:05 +0200 Subject: [PATCH 0534/1418] x86/decompressor: Assign paging related global variables earlier commit 00c6b0978ec182f1a672095930872168b9d5b1e2 upstream. There is no need to defer the assignment of the paging related global variables 'pgdir_shift' and 'ptrs_per_p4d' until after the trampoline is cleaned up, so assign them as soon as it is clear that 5-level paging will be enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-9-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.h | 2 -- arch/x86/boot/compressed/pgtable_64.c | 14 +++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a49d9219c06e5..b6e46435b90b8 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -170,9 +170,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #endif /* ident_map_64.c */ -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; -#endif extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2ac12ff4111bf..f8092d3244c95 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -130,6 +130,11 @@ struct paging_config paging_prepare(void *rmode) native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { paging_config.l5_required = 1; + + /* Initialize variables for 5-level paging */ + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } paging_config.trampoline_start = find_trampoline_placement(); @@ -206,13 +211,4 @@ void cleanup_trampoline(void *pgtable) /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); - - /* Initialize variables for 5-level paging */ -#ifdef CONFIG_X86_5LEVEL - if (__read_cr4() & X86_CR4_LA57) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - } -#endif } -- GitLab From 640f27fc2e7bd69d511675c0c62a90bd9ed977cb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:06 +0200 Subject: [PATCH 0535/1418] x86/decompressor: Call trampoline as a normal function commit e8972a76aa90c05a0078043413f806c02fcb3487 upstream. Move the long return to switch to 32-bit mode into the trampoline code so it can be called as an ordinary function. This will allow it to be called directly from C code in a subsequent patch. While at it, reorganize the code somewhat to keep the prologue and epilogue of the function together, making the code a bit easier to follow. Also, given that the trampoline is now entered in 64-bit mode, a simple RIP-relative reference can be used to take the address of the exit point. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-10-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 79 +++++++++++++----------------- arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index bb268d165906b..381ca6bf62d35 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -450,39 +450,8 @@ SYM_CODE_START(startup_64) /* Save the trampoline address in RCX */ movq %rax, %rcx - /* Set up 32-bit addressable stack */ - leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp - - /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. - */ - pushq %r15 - pushq %rbp - pushq %rbx - - /* - * Push the 64-bit address of trampoline_return() onto the new stack. - * It will be used by the trampoline to return to the main code. Due to - * the 32-bit mode switch, it cannot be kept it in a register either. - */ - leaq trampoline_return(%rip), %rdi - pushq %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore live 64-bit registers */ - popq %rbx - popq %rbp - popq %r15 - - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp + call *%rax /* * cleanup_trampoline() would restore trampoline memory. @@ -579,7 +548,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* * This is the 32-bit trampoline that will be copied over to low memory. * @@ -588,6 +556,39 @@ SYM_FUNC_END(.Lrelocated) * Non zero RDX means trampoline needs to enable 5-level paging. */ SYM_CODE_START(trampoline_32bit_src) + /* + * Preserve live 64-bit registers on the stack: this is necessary + * because the architecture does not guarantee that GPRs will retain + * their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %rbp + pushq %rbx + + /* Set up 32-bit addressable stack and push the old RSP value */ + leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx + movq %rsp, (%rbx) + movq %rbx, %rsp + + /* Take the address of the trampoline exit code */ + leaq .Lret(%rip), %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + +.Lret: + /* Restore the preserved 64-bit registers */ + movq (%rsp), %rsp + popq %rbx + popq %rbp + popq %r15 + retq + + .code32 +0: /* Set up data and stack segments */ movl $__KERNEL_DS, %eax movl %eax, %ds @@ -651,12 +652,9 @@ SYM_CODE_START(trampoline_32bit_src) 1: movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - /* Prepare the stack for far return to Long Mode */ pushl $__KERNEL_CS - pushl %eax + pushl %ebx /* Enable paging again. */ movl %cr0, %eax @@ -666,12 +664,6 @@ SYM_CODE_START(trampoline_32bit_src) lret SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - retq -SYM_FUNC_END(.Lpaging_enabled) - /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows @@ -679,7 +671,6 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index cc9b2529a0863..91dbb99203fbc 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE -- GitLab From 6083b4c5908e0e6d1b578af04103f64c257ffb82 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:07 +0200 Subject: [PATCH 0536/1418] x86/decompressor: Use standard calling convention for trampoline commit 918a7a04e71745e99a0efc6753e587439b794b29 upstream. Update the trampoline code so its arguments are passed via RDI and RSI, which matches the ordinary SysV calling convention for x86_64. This will allow this code to be called directly from C. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-11-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 27 +++++++++++++-------------- arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 381ca6bf62d35..c28f7ef80ad27 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -447,9 +447,9 @@ SYM_CODE_START(startup_64) movq %r15, %rdi call paging_prepare - /* Save the trampoline address in RCX */ - movq %rax, %rcx - + /* Pass the trampoline address and boolean flag as args #1 and #2 */ + movq %rax, %rdi + movq %rdx, %rsi leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax call *%rax @@ -549,11 +549,14 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) SYM_FUNC_END(.Lrelocated) /* - * This is the 32-bit trampoline that will be copied over to low memory. + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * ECX contains the base address of the trampoline memory. - * Non zero RDX means trampoline needs to enable 5-level paging. + * The first argument (EDI) contains the 32-bit addressable base of the + * trampoline memory. A non-zero second argument (ESI) means that the + * trampoline needs to enable 5-level paging. */ SYM_CODE_START(trampoline_32bit_src) /* @@ -600,7 +603,7 @@ SYM_CODE_START(trampoline_32bit_src) movl %eax, %cr0 /* Check what paging mode we want to be in after the trampoline */ - testl %edx, %edx + testl %esi, %esi jz 1f /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ @@ -615,21 +618,17 @@ SYM_CODE_START(trampoline_32bit_src) jz 3f 2: /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax + leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax movl %eax, %cr3 3: /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - pushl %ecx - pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax /* Avoid writing EFER if no change was made (for TDX guest) */ jc 1f wrmsr -1: popl %edx - popl %ecx - +1: #ifdef CONFIG_X86_MCE /* * Preserve CR4.MCE if the kernel will enable #MC support. @@ -646,7 +645,7 @@ SYM_CODE_START(trampoline_32bit_src) /* Enable PAE and LA57 (if required) paging modes */ orl $X86_CR4_PAE, %eax - testl %edx, %edx + testl %esi, %esi jz 1f orl $X86_CR4_LA57, %eax 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 91dbb99203fbc..4e8cef135226b 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -14,7 +14,7 @@ extern unsigned long *trampoline_32bit; -extern void trampoline_32bit_src(void *return_ptr); +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ -- GitLab From 1523291591de054393ff4d732f18abd222ff5949 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:08 +0200 Subject: [PATCH 0537/1418] x86/decompressor: Avoid the need for a stack in the 32-bit trampoline commit bd328aa01ff77a45aeffea5fc4521854291db11f upstream. The 32-bit trampoline no longer uses the stack for anything except performing a far return back to long mode, and preserving the caller's stack pointer value. Currently, the trampoline stack is placed in the same page that carries the trampoline code, which means this page must be mapped writable and executable, and the stack is therefore executable as well. Replace the far return with a far jump, so that the return address can be pre-calculated and patched into the code before it is called. This removes the need for a 32-bit addressable stack entirely, and in a later patch, this will be taken advantage of by removing writable permissions from (and adding executable permissions to) the trampoline code page when booting via the EFI stub. Note that the value of RSP still needs to be preserved explicitly across the switch into 32-bit mode, as the register may get truncated to 32 bits. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 45 ++++++++++++++++----------- arch/x86/boot/compressed/pgtable.h | 4 +-- arch/x86/boot/compressed/pgtable_64.c | 12 ++++++- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c28f7ef80ad27..40848b817ac5b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -558,6 +558,7 @@ SYM_FUNC_END(.Lrelocated) * trampoline memory. A non-zero second argument (ESI) means that the * trampoline needs to enable 5-level paging. */ + .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) /* * Preserve live 64-bit registers on the stack: this is necessary @@ -568,13 +569,9 @@ SYM_CODE_START(trampoline_32bit_src) pushq %rbp pushq %rbx - /* Set up 32-bit addressable stack and push the old RSP value */ - leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx - movq %rsp, (%rbx) - movq %rbx, %rsp - - /* Take the address of the trampoline exit code */ - leaq .Lret(%rip), %rbx + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ pushq $__KERNEL32_CS @@ -582,9 +579,17 @@ SYM_CODE_START(trampoline_32bit_src) pushq %rax lretq + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ .Lret: + shlq $32, %rbx + orq %rbx, %rsp + /* Restore the preserved 64-bit registers */ - movq (%rsp), %rsp popq %rbx popq %rbp popq %r15 @@ -592,11 +597,6 @@ SYM_CODE_START(trampoline_32bit_src) .code32 0: - /* Set up data and stack segments */ - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %ss - /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -651,18 +651,26 @@ SYM_CODE_START(trampoline_32bit_src) 1: movl %eax, %cr4 - /* Prepare the stack for far return to Long Mode */ - pushl $__KERNEL_CS - pushl %ebx - /* Enable paging again. */ movl %cr0, %eax btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - lret + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) + /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows @@ -670,6 +678,7 @@ SYM_CODE_END(trampoline_32bit_src) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE + .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 4e8cef135226b..c6b0903aded05 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -8,13 +8,13 @@ #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 -#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE - #ifndef __ASSEMBLER__ extern unsigned long *trampoline_32bit; extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); +extern const u16 trampoline_ljmp_imm_offset; + #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index f8092d3244c95..5198a05aefa8d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void) struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; + void *tramp_code; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ boot_params = rmode; @@ -148,9 +149,18 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + tramp_code = memcpy(trampoline_32bit + + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + /* + * Avoid the need for a stack in the 32-bit trampoline code, by using + * LJMP rather than LRET to return back to long mode. LJMP takes an + * immediate absolute address, which needs to be adjusted based on the + * placement of the trampoline. + */ + *(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code; + /* * The code below prepares page table in trampoline memory. * -- GitLab From 364d7745974f20ed940918e3129d10c271638153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:09 +0200 Subject: [PATCH 0538/1418] x86/decompressor: Call trampoline directly from C code commit 64ef578b6b6866bec012544416946533444036c8 upstream. Instead of returning to the asm calling code to invoke the trampoline, call it straight from the C code that sets it up. That way, the struct return type is no longer needed for returning two values, and the call can be made conditional more cleanly in a subsequent patch. This means that all callee save 64-bit registers need to be preserved and restored, as their contents may not survive the legacy mode switch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-13-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 31 +++++++++++--------------- arch/x86/boot/compressed/pgtable_64.c | 32 +++++++++++---------------- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 40848b817ac5b..7d0cf029c8246 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -433,25 +433,14 @@ SYM_CODE_START(startup_64) #endif /* - * paging_prepare() sets up the trampoline and checks if we need to - * enable 5-level paging. - * - * paging_prepare() returns a two-quadword structure which lands - * into RDX:RAX: - * - Address of the trampoline is returned in RAX. - * - Non zero RDX means trampoline needs to enable 5-level - * paging. + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. * * Pass the boot_params pointer as the first argument. */ movq %r15, %rdi - call paging_prepare - - /* Pass the trampoline address and boolean flag as args #1 and #2 */ - movq %rax, %rdi - movq %rdx, %rsi - leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - call *%rax + call configure_5level_paging /* * cleanup_trampoline() would restore trampoline memory. @@ -561,11 +550,14 @@ SYM_FUNC_END(.Lrelocated) .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. */ pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 pushq %rbp pushq %rbx @@ -592,6 +584,9 @@ SYM_CODE_START(trampoline_32bit_src) /* Restore the preserved 64-bit registers */ popq %rbx popq %rbp + popq %r12 + popq %r13 + popq %r14 popq %r15 retq diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 5198a05aefa8d..f9cc86b2ee55c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; unsigned int __section(".data") ptrs_per_p4d = 1; #endif -struct paging_config { - unsigned long trampoline_start; - unsigned long l5_required; -}; - /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; @@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * purposes. * * Avoid putting the pointer into .bss as it will be cleared between - * paging_prepare() and extract_kernel(). + * configure_5level_paging() and extract_kernel(). */ unsigned long *trampoline_32bit __section(".data"); @@ -106,13 +101,13 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -struct paging_config paging_prepare(void *rmode) +asmlinkage void configure_5level_paging(struct boot_params *bp) { - struct paging_config paging_config = {}; - void *tramp_code; + void (*toggle_la57)(void *trampoline, bool enable_5lvl); + bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = rmode; + boot_params = bp; /* * Check if LA57 is desired and supported. @@ -130,7 +125,7 @@ struct paging_config paging_prepare(void *rmode) !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { - paging_config.l5_required = 1; + l5_required = true; /* Initialize variables for 5-level paging */ __pgtable_l5_enabled = 1; @@ -138,9 +133,7 @@ struct paging_config paging_prepare(void *rmode) ptrs_per_p4d = 512; } - paging_config.trampoline_start = find_trampoline_placement(); - - trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); @@ -149,7 +142,7 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - tramp_code = memcpy(trampoline_32bit + + toggle_la57 = memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); @@ -159,7 +152,8 @@ struct paging_config paging_prepare(void *rmode) * immediate absolute address, which needs to be adjusted based on the * placement of the trampoline. */ - *(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code; + *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += + (unsigned long)toggle_la57; /* * The code below prepares page table in trampoline memory. @@ -175,10 +169,10 @@ struct paging_config paging_prepare(void *rmode) * We are not going to use the page table in trampoline memory if we * are already in the desired paging mode. */ - if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) goto out; - if (paging_config.l5_required) { + if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. @@ -201,7 +195,7 @@ struct paging_config paging_prepare(void *rmode) } out: - return paging_config; + toggle_la57(trampoline_32bit, l5_required); } void cleanup_trampoline(void *pgtable) -- GitLab From e2fa53a04cc722aaaedbd91cd414d170067fb09d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:10 +0200 Subject: [PATCH 0539/1418] x86/decompressor: Only call the trampoline when changing paging levels commit f97b67a773cd84bd8b55c0a0ec32448a87fc56bb upstream. Since the current and desired number of paging levels are known when the trampoline is being prepared, avoid calling the trampoline at all if it is clear that calling it is not going to result in a change to the number of paging levels. Given that the CPU is already running in long mode, the PAE and LA57 settings are necessarily consistent with the currently active page tables, and other fields in CR4 will be initialized by the startup code in the kernel proper. So limit the manipulation of CR4 to toggling the LA57 bit, which is the only thing that really needs doing at this point in the boot. This also means that there is no need to pass the value of l5_required to toggle_la57(), as it will not be called unless CR4.LA57 needs to toggle. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-14-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 45 +++------------------------ arch/x86/boot/compressed/pgtable_64.c | 22 ++++++------- 2 files changed, 13 insertions(+), 54 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 7d0cf029c8246..4645d7b66b1f1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -390,10 +390,6 @@ SYM_CODE_START(startup_64) * For the trampoline, we need the top page table to reside in lower * memory as we don't have a way to load 64-bit values into CR3 in * 32-bit mode. - * - * We go though the trampoline even if we don't have to: if we're - * already in a desired paging mode. This way the trampoline code gets - * tested on every boot. */ /* Make sure we have GDT with 32-bit code segment */ @@ -544,8 +540,7 @@ SYM_FUNC_END(.Lrelocated) * * Return address is at the top of the stack (might be above 4G). * The first argument (EDI) contains the 32-bit addressable base of the - * trampoline memory. A non-zero second argument (ESI) means that the - * trampoline needs to enable 5-level paging. + * trampoline memory. */ .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) @@ -597,25 +592,10 @@ SYM_CODE_START(trampoline_32bit_src) btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Check what paging mode we want to be in after the trampoline */ - testl %esi, %esi - jz 1f - - /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jnz 3f - jmp 2f -1: - /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jz 3f -2: /* Point CR3 to the trampoline's new top level page table */ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax movl %eax, %cr3 -3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ movl $MSR_EFER, %ecx rdmsr @@ -624,26 +604,9 @@ SYM_CODE_START(trampoline_32bit_src) jc 1f wrmsr 1: -#ifdef CONFIG_X86_MCE - /* - * Preserve CR4.MCE if the kernel will enable #MC support. - * Clearing MCE may fault in some environments (that also force #MC - * support). Any machine check that occurs before #MC support is fully - * configured will crash the system regardless of the CR4.MCE value set - * here. - */ + /* Toggle CR4.LA57 */ movl %cr4, %eax - andl $X86_CR4_MCE, %eax -#else - movl $0, %eax -#endif - - /* Enable PAE and LA57 (if required) paging modes */ - orl $X86_CR4_PAE, %eax - testl %esi, %esi - jz 1f - orl $X86_CR4_LA57, %eax -1: + btcl $X86_CR4_LA57_BIT, %eax movl %eax, %cr4 /* Enable paging again. */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index f9cc86b2ee55c..4213473ae5488 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void) asmlinkage void configure_5level_paging(struct boot_params *bp) { - void (*toggle_la57)(void *trampoline, bool enable_5lvl); + void (*toggle_la57)(void *trampoline); bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ @@ -133,6 +133,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) ptrs_per_p4d = 512; } + /* + * The trampoline will not be used if the paging mode is already set to + * the desired one. + */ + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + return; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ @@ -160,18 +167,8 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * * The new page table will be used by trampoline code for switching * from 4- to 5-level paging or vice versa. - * - * If switching is not required, the page table is unused: trampoline - * code wouldn't touch CR3. */ - /* - * We are not going to use the page table in trampoline memory if we - * are already in the desired paging mode. - */ - if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) - goto out; - if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as @@ -194,8 +191,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) (void *)src, PAGE_SIZE); } -out: - toggle_la57(trampoline_32bit, l5_required); + toggle_la57(trampoline_32bit); } void cleanup_trampoline(void *pgtable) -- GitLab From df3dec320b7c14780484e824f3ec9c213e4996e1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:11 +0200 Subject: [PATCH 0540/1418] x86/decompressor: Pass pgtable address to trampoline directly commit cb83cece57e1889109dd73ea08ee338668c9d1b8 upstream. The only remaining use of the trampoline address by the trampoline itself is deriving the page table address from it, and this involves adding an offset of 0x0. So simplify this, and pass the new CR3 value directly. This makes the fact that the page table happens to be at the start of the trampoline allocation an implementation detail of the caller. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-15-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/boot/compressed/pgtable.h | 2 -- arch/x86/boot/compressed/pgtable_64.c | 9 ++++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4645d7b66b1f1..148b349c193eb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -539,8 +539,9 @@ SYM_FUNC_END(.Lrelocated) * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * The first argument (EDI) contains the 32-bit addressable base of the - * trampoline memory. + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. */ .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) @@ -593,8 +594,7 @@ SYM_CODE_START(trampoline_32bit_src) movl %eax, %cr0 /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax - movl %eax, %cr3 + movl %edi, %cr3 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ movl $MSR_EFER, %ecx diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index c6b0903aded05..6d595abe06b34 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -3,8 +3,6 @@ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) -#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 - #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 4213473ae5488..eab4e6b568ae0 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void) asmlinkage void configure_5level_paging(struct boot_params *bp) { - void (*toggle_la57)(void *trampoline); + void (*toggle_la57)(void *cr3); bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ @@ -174,7 +174,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; } else { unsigned long src; @@ -187,8 +187,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * may be above 4G. */ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), - (void *)src, PAGE_SIZE); + memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); } toggle_la57(trampoline_32bit); @@ -198,7 +197,7 @@ void cleanup_trampoline(void *pgtable) { void *trampoline_pgtable; - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + trampoline_pgtable = trampoline_32bit; /* * Move the top level page table out of trampoline memory, -- GitLab From 463b51e90c576cd63269f8420c0a0b09152092e5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:12 +0200 Subject: [PATCH 0541/1418] x86/decompressor: Merge trampoline cleanup with switching code commit 03dda95137d3247564854ad9032c0354273a159d upstream. Now that the trampoline setup code and the actual invocation of it are all done from the C routine, the trampoline cleanup can be merged into it as well, instead of returning to asm just to call another C function. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-16-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 14 ++++---------- arch/x86/boot/compressed/pgtable_64.c | 18 ++++-------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 148b349c193eb..81458f77131b2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -433,20 +433,14 @@ SYM_CODE_START(startup_64) * a trampoline in 32-bit addressable memory if the current number does * not match the desired number. * - * Pass the boot_params pointer as the first argument. + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). */ movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi call configure_5level_paging - /* - * cleanup_trampoline() would restore trampoline memory. - * - * RDI is address of the page table to use instead of page table - * in trampoline memory (if required). - */ - leaq rva(top_pgtable)(%rbx), %rdi - call cleanup_trampoline - /* Zero EFLAGS */ pushq $0 popfq diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index eab4e6b568ae0..7939eb6e6ce9b 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -101,7 +101,7 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -asmlinkage void configure_5level_paging(struct boot_params *bp) +asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) { void (*toggle_la57)(void *cr3); bool l5_required = false; @@ -191,22 +191,12 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) } toggle_la57(trampoline_32bit); -} - -void cleanup_trampoline(void *pgtable) -{ - void *trampoline_pgtable; - - trampoline_pgtable = trampoline_32bit; /* - * Move the top level page table out of trampoline memory, - * if it's there. + * Move the top level page table out of trampoline memory. */ - if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)pgtable); - } + memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); -- GitLab From 5c4feadb0011983bbc4587bc61056c7b379d9969 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:16 +0200 Subject: [PATCH 0542/1418] x86/decompressor: Move global symbol references to C code commit 24388292e2d7fae79a0d4183cc91716b851299cf upstream. It is no longer necessary to be cautious when referring to global variables in the position independent decompressor code, now that it is built using PIE codegen and makes an assertion in the linker script that no GOT entries exist (which would require adjustment for the actual runtime load address of the decompressor binary). This means global variables can be referenced directly from C code, instead of having to pass their runtime addresses into C routines from asm code, which needs to happen at each call site. Do so for the code that will be called directly from the EFI stub after a subsequent patch, and avoid the need to duplicate this logic a third time. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-20-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 8 -------- arch/x86/boot/compressed/head_64.S | 10 ++-------- arch/x86/boot/compressed/misc.c | 16 +++++++++------- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 987ae727cf9f0..3ecc1bbe971e1 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -179,13 +179,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) */ /* push arguments for extract_kernel: */ - pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ pushl %ebp /* output address */ - pushl input_len@GOTOFF(%ebx) /* input_len */ - leal input_data@GOTOFF(%ebx), %eax - pushl %eax /* input_data */ - leal boot_heap@GOTOFF(%ebx), %eax - pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp @@ -213,8 +207,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 81458f77131b2..fafd0a59f3961 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -511,13 +511,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* * Do the extraction, and jump to the new kernel.. */ - /* pass struct boot_params pointer */ + /* pass struct boot_params pointer and output target address */ movq %r15, %rdi - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl input_len(%rip), %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - movl output_len(%rip), %r9d /* decompressed length, end of relocs */ + movq %rbp, %rsi call extract_kernel /* returns kernel entry point in %rax */ /* @@ -675,8 +671,6 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) */ .bss .balign 4 -SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) - SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 .balign 16 diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 014ff222bf4b3..e4e3e49fcc374 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,11 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -347,14 +352,11 @@ static size_t parse_elf(void *output) * |-------uncompressed kernel image---------| * */ -asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output, - unsigned long output_len) +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; unsigned long needed_size; size_t entry_offset; @@ -412,7 +414,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entries. This ensures the full mapped area is usable RAM * and doesn't include any reserved areas. */ - needed_size = max(output_len, kernel_total_size); + needed_size = max_t(unsigned long, output_len, kernel_total_size); #ifdef CONFIG_X86_64 needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); #endif @@ -443,7 +445,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); - if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) -- GitLab From bf0ca988e250af95824c121873b2f76fccfc91df Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:15 +0200 Subject: [PATCH 0543/1418] decompress: Use 8 byte alignment commit 8217ad0a435ff06d651d7298ea8ae8d72388179e upstream. The ZSTD decompressor requires malloc() allocations to be 8 byte aligned, so ensure that this the case. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-19-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/decompress/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1a731..ac862422df158 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; -- GitLab From 04dd4403ff3721ad0bff925116fada773ed6ae69 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: [PATCH 0544/1418] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml commit 5b750b22530fe53bf7fd6a30baacd53ada26911b upstream. Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 6fdf87a6e240f..6c7b286e1123d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -51,8 +51,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) -- GitLab From 831e9e63cc3b90f62d82df854cda8232408526a9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Feb 2024 10:25:49 +1100 Subject: [PATCH 0545/1418] NFS: Fix data corruption caused by congestion. when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects congestion) it is important that the page is redirtied. nfs_writepage_locked() doesn't do this, so files can become corrupted as writes can be lost. Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be returned. It is needed for kernels v5.18..v6.7. From 6.3 onward the patch is different as it needs to mention "folio", not "page". Reported-and-tested-by: Jacek Tomaka Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f41d24b54fd1f..6a06066684172 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page, int err; if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) + NFS_SERVER(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, -- GitLab From d03a9855cbe6f41b2928c4df2e33e05f32a8e7fa Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 13 Sep 2022 14:01:51 -0400 Subject: [PATCH 0546/1418] NFSD: Simplify READ_PLUS [ Upstream commit eeadcb75794516839078c28b3730132aeb700ce6 ] Chuck had suggested reverting READ_PLUS so it returns a single DATA segment covering the requested read range. This prepares the server for a future "sparse read" function so support can easily be added without needing to rip out the old READ_PLUS code at the same time. Signed-off-by: Anna Schumaker Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 139 +++++++++++----------------------------------- 1 file changed, 32 insertions(+), 107 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89a579be042e5..51a598ee68fe1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4777,79 +4777,37 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof, - loff_t *pos) + struct nfsd4_read *read) { - struct xdr_stream *xdr = resp->xdr; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); struct file *file = read->rd_nf->nf_file; - int starting_len = xdr->buf->len; - loff_t hole_pos; - __be32 nfserr; - __be32 *p, tmp; - __be64 tmp64; - - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos > read->rd_offset) - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount; + __be32 nfserr, *p; /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_resource; + return nfserr_io; + if (resp->xdr->buf->page_len && splice_ok) { + WARN_ON_ONCE(splice_ok); + return nfserr_serverfault; + } - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); - if (read->rd_vlen < 0) - return nfserr_resource; + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (file->f_op->splice_read && splice_ok) + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - - tmp = htonl(NFS4_CONTENT_DATA); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp64 = cpu_to_be64(read->rd_offset); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); - tmp = htonl(*maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); - - tmp = xdr_zero; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, - xdr_pad_size(*maxcount)); - return nfs_ok; -} - -static __be32 -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof) -{ - struct file *file = read->rd_nf->nf_file; - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); - loff_t f_size = i_size_read(file_inode(file)); - unsigned long count; - __be32 *p; - - if (data_pos == -ENXIO) - data_pos = f_size; - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); - count = data_pos - read->rd_offset; - /* Content type, offset, byte count */ - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); - if (!p) - return nfserr_resource; - - *p++ = htonl(NFS4_CONTENT_HOLE); + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + *p = cpu_to_be32(read->rd_length); - *eof = (read->rd_offset + count) >= f_size; - *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } @@ -4857,69 +4815,36 @@ static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { - unsigned long maxcount, count; + struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; - int last_segment = xdr->buf->len; - int segments = 0; - __be32 *p, tmp; - bool is_data; - loff_t pos; - u32 eof; + u32 segments = 0; + __be32 *p; if (nfserr) return nfserr; - file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_resource; + return nfserr_io; xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); - if (eof) + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); + if (read->rd_eof) goto out; - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - is_data = pos > read->rd_offset; - - while (count > 0 && !eof) { - maxcount = count; - if (is_data) - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, - segments == 0 ? &pos : NULL); - else - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); - if (nfserr) - goto out; - count -= maxcount; - read->rd_offset += maxcount; - is_data = !is_data; - last_segment = xdr->buf->len; - segments++; - } - -out: - if (nfserr && segments == 0) + nfserr = nfsd4_encode_read_plus_data(resp, read); + if (nfserr) { xdr_truncate_encode(xdr, starting_len); - else { - if (nfserr) { - xdr_truncate_encode(xdr, last_segment); - nfserr = nfs_ok; - eof = 0; - } - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + return nfserr; } + segments++; + +out: + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(segments); return nfserr; } -- GitLab From 0a49efb94888b6381d9c43fda17115ffda40a039 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Oct 2022 21:24:23 +0100 Subject: [PATCH 0547/1418] NFSD: Remove redundant assignment to variable host_err [ Upstream commit 69eed23baf877bbb1f14d7f4df54f89807c9ee2a ] Variable host_err is assigned a value that is never read, it is being re-assigned a value in every different execution path in the following switch statement. The assignment is redundant and can be removed. Cleans up clang-scan warning: warning: Value stored to 'host_err' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eccc6ce55a63a..fce7a35a5e64d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1317,7 +1317,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; - host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); -- GitLab From bfef0cfab41cb4894bc5cf8b93e76327ac04b9b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:54 -0400 Subject: [PATCH 0548/1418] nfsd: ignore requests to disable unsupported versions [ Upstream commit 8e823bafff2308753d430566256c83d8085952da ] The kernel currently errors out if you attempt to enable or disable a version that it doesn't recognize. Change it to ignore attempts to disable an unrecognized version. If we don't support it, then there is no harm in doing so. Signed-off-by: Jeff Layton Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 573de0d49e172..c21f5815d7264 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -601,7 +601,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - return -EINVAL; + /* Ignore requests to disable non-existent versions */ + if (cmd == NFSD_SET) + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); -- GitLab From 850333a25aab582118d9fa405af00caae32faa62 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:55 -0400 Subject: [PATCH 0549/1418] nfsd: move nfserrno() to vfs.c [ Upstream commit cb12fae1c34b1fa7eaae92c5aadc72d86d7fae19 ] nfserrno() is common to all nfs versions, but nfsproc.c is specifically for NFSv2. Move it to vfs.c, and the prototype to vfs.h. While we're in here, remove the #ifdef EDQUOT check in this function. It's apparently a holdover from the initial merge of the nfsd code in 1997. No other place in the kernel checks that that symbol is defined before using it, so I think we can dispense with it here. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/blocklayout.c | 1 + fs/nfsd/blocklayoutxdr.c | 1 + fs/nfsd/export.h | 1 - fs/nfsd/flexfilelayout.c | 1 + fs/nfsd/nfs4idmap.c | 1 + fs/nfsd/nfsproc.c | 62 --------------------------------------- fs/nfsd/vfs.c | 63 ++++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.h | 1 + 8 files changed, 68 insertions(+), 63 deletions(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e7e6e78d965db..01d7fd108cf3d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -12,6 +12,7 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 2455dc8be18a8..1ed2f691ebb90 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,6 +9,7 @@ #include "nfsd.h" #include "blocklayoutxdr.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ee0e3aba4a6e5..d03f7f6a8642d 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index 070f90ed09b61..3ca5304440ff0 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,6 +15,7 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e70a1a2999b7b..5e9809aff37eb 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,7 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" +#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 82b3ddeacc338..52fc222c34f26 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; - -/* - * Map errnos to NFS errnos. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_stale, -EBADF }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, -#ifdef EDQUOT - { nfserr_dquot, -EDQUOT }, -#endif - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EREMOTEIO }, - { nfserr_stale, -EOPENSTALE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fce7a35a5e64d..5d6a61d47a905 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -49,6 +49,69 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +/** + * nfserrno - Map Linux errnos to NFS errnos + * @errno: POSIX(-ish) error code to be mapped + * + * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If + * it's an error we don't expect, log it once and return nfserr_io. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, + { nfserr_dquot, -EDQUOT }, + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9744b041105b5..dbdfef7ae85bb 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, -- GitLab From f82865e2a026b6d491377e64ad18326a413e6421 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:56 -0400 Subject: [PATCH 0550/1418] nfsd: allow disabling NFSv2 at compile time [ Upstream commit 2f3a4b2ac2f28b9be78ad21f401f31e263845214 ] rpc.nfsd stopped supporting NFSv2 a year ago. Take the next logical step toward deprecating it and allow NFSv2 support to be compiled out. Add a new CONFIG_NFSD_V2 option that can be turned off and rework the CONFIG_NFSD_V?_ACL option dependencies. Add a description that discourages enabling it. Also, change the description of CONFIG_NFSD to state that the always-on version is now 3 instead of 2. Finally, add an #ifdef around "case 2:" in __write_versions. When NFSv2 is disabled at compile time, this should make the kernel ignore attempts to disable it at runtime, but still error out when trying to enable it. Signed-off-by: Jeff Layton Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/Kconfig | 19 +++++++++++++++---- fs/nfsd/Makefile | 5 +++-- fs/nfsd/nfsctl.c | 2 ++ fs/nfsd/nfsd.h | 3 +-- fs/nfsd/nfssvc.c | 6 ++++++ 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f6a2fd3015e75..7c441f2bd4440 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,6 +8,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -26,19 +27,29 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 2 (RFC 1094) is always available when + Support for NFS version 3 (RFC 1813) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2_ACL - bool +config NFSD_V2 + bool "NFS server support for NFS version 2 (DEPRECATED)" depends on NFSD + default n + help + NFSv2 (RFC 1094) was the first publicly-released version of NFS. + Unless you are hosting ancient (1990's era) NFS clients, you don't + need this. + + If unsure, say N. + +config NFSD_V2_ACL + bool "NFS server support for the NFSv2 ACL protocol extension" + depends on NFSD_V2 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD - select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 805c06d5f1b4b..6fffc8f03f740 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o nfsxdr.o \ +nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o \ stats.o filecache.o nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c21f5815d7264..a8884d0b4638c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { +#ifdef CONFIG_NFSD_V2 case 2: +#endif case 3: nfsd_vers(nn, num, cmd); break; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 09726c5b9a317..93b42ef9ed91b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -64,8 +64,7 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, - nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7695ebd28dc3..6f4a38f5ab0ce 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { +# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, +# endif +# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, +# endif }; #define NFSD_ACL_MINVERS 2 @@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { +#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, +#endif [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, -- GitLab From 137d20da8ea0daa9e0a2787acc4b66261e8796df Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 21 Oct 2022 14:24:14 +0200 Subject: [PATCH 0551/1418] exportfs: use pr_debug for unreachable debug statements [ Upstream commit 427505ffeaa464f683faba945a88d3e3248f6979 ] expfs.c has a bunch of dprintk statements which are unusable due to: #define dprintk(fmt, args...) do{}while(0) Use pr_debug so that they can be enabled dynamically. Also make some minor changes to the debug statements to fix some incorrect types, and remove __func__ which can be handled by dynamic debug separately. Signed-off-by: David Disseldorp Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/exportfs/expfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c648a493faf23..3204bd33e4e8a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include #include -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } -- GitLab From e62d8c1281662a0cff23df2948162c1fe705d613 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Nov 2022 13:30:46 -0400 Subject: [PATCH 0552/1418] NFSD: Flesh out a documenting comment for filecache.c [ Upstream commit b3276c1f5b268ff56622e9e125b792b4c3dc03ac ] Record what we've learned recently about the NFSD filecache in a documenting comment so our future selves don't forget what all this is for. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5cb8cce153a57..24ed511ed0d38 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -2,6 +2,30 @@ * Open file cache. * * (c) 2015 - Jeff Layton + * + * An nfsd_file object is a per-file collection of open state that binds + * together: + * - a struct file * + * - a user credential + * - a network namespace + * - a read-ahead context + * - monitoring for writeback errors + * + * nfsd_file objects are reference-counted. Consumers acquire a new + * object via the nfsd_file_acquire API. They manage their interest in + * the acquired object, and hence the object's reference count, via + * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file + * object: + * + * * non-garbage-collected: When a consumer wants to precisely control + * the lifetime of a file's open state, it acquires a non-garbage- + * collected nfsd_file. The final nfsd_file_put releases the open + * state immediately. + * + * * garbage-collected: When a consumer does not control the lifetime + * of open state, it acquires a garbage-collected nfsd_file. The + * final nfsd_file_put allows the open state to linger for a period + * during which it may be re-used. */ #include -- GitLab From 519a80ea5a1770f1bb7d0627f4670ca1c1767f80 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:57 -0400 Subject: [PATCH 0553/1418] NFSD: Clean up nfs4_preprocess_stateid_op() call sites [ Upstream commit eeff73f7c1c583f79a401284f46c619294859310 ] Remove the lame-duck dprintk()s around nfs4_preprocess_stateid_op() call sites. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9105e95b59c5..ba53cd89ec62c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); - goto out; - } - status = nfs_ok; -out: + read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) { - dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + if (status) return status; - } } err = fh_want_write(&cstate->current_fh); if (err) @@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + if (status) return status; - } write->wr_how_written = write->wr_stable_how; @@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + if (status) goto out; - } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + if (status) goto out_put_src; - } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) { - dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + if (status != nfs_ok) return status; - } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + if (status) return status; - } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: -- GitLab From 9fbef7dcd8aa552d5a7e6867eec570e89d7d1631 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:03 -0400 Subject: [PATCH 0554/1418] NFSD: Trace stateids returned via DELEGRETURN [ Upstream commit 20eee313ff4b8a7e71ae9560f5c4ba27cd763005 ] Handing out a delegation stateid is recorded with the nfsd_deleg_read tracepoint, but there isn't a matching tracepoint for recording when the stateid is returned. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 1 + fs/nfsd/trace.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b3f6dda930d8b..6f974fdb47de2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6935,6 +6935,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + trace_nfsd_deleg_return(stateid); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 132335011ccae..fedf676ef446f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -604,6 +604,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_return); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, -- GitLab From fae3f8b554fae8631a954e2b205ad84c531ba71b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:09 -0400 Subject: [PATCH 0555/1418] NFSD: Trace delegation revocations [ Upstream commit a1c74569bbde91299f24535abf711be5c84df9de ] Delegation revocation is an exceptional event that is not otherwise visible externally (eg, no network traffic is emitted). Generate a trace record when it occurs so that revocation can be observed or other activity can be triggered. Example: nfsd-1104 [005] 1912.002544: nfsd_stid_revoke: client 633c9343:4e82788d stateid 00000003:00000001 ref=2 type=DELEG Trace infrastructure is provided for subsequent additional tracing related to nfs4_stid activity. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 ++ fs/nfsd/trace.h | 55 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6f974fdb47de2..5d55812f5a2ae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1367,6 +1367,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + trace_nfsd_stid_revoke(&dp->dl_stid); + if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index fedf676ef446f..d261a06b61408 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -637,6 +637,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); +TRACE_DEFINE_ENUM(NFS4_OPEN_STID); +TRACE_DEFINE_ENUM(NFS4_LOCK_STID); +TRACE_DEFINE_ENUM(NFS4_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); +TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); + +#define show_stid_type(x) \ + __print_flags(x, "|", \ + { NFS4_OPEN_STID, "OPEN" }, \ + { NFS4_LOCK_STID, "LOCK" }, \ + { NFS4_DELEG_STID, "DELEG" }, \ + { NFS4_CLOSED_STID, "CLOSED" }, \ + { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ + { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ + { NFS4_LAYOUT_STID, "LAYOUT" }) + +DECLARE_EVENT_CLASS(nfsd_stid_class, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_STRUCT__entry( + __field(unsigned long, sc_type) + __field(int, sc_count) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + + __entry->sc_type = stid->sc_type; + __entry->sc_count = refcount_read(&stid->sc_count); + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->sc_count, show_stid_type(__entry->sc_type) + ) +); + +#define DEFINE_STID_EVENT(name) \ +DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ + TP_PROTO(const struct nfs4_stid *stid), \ + TP_ARGS(stid)) + +DEFINE_STID_EVENT(revoke); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), -- GitLab From 255ac53d78d562fe27b486360699dcbeb0bfacf8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:16 -0400 Subject: [PATCH 0556/1418] NFSD: Use const pointers as parameters to fh_ helpers [ Upstream commit b48f8056c034f28dd54668399f1d22be421b0bef ] Enable callers to use const pointers where they are able to. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsfh.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index c3ae6414fc5cf..513e028b0bbee 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, struct svc_fh *src) +fh_copy(struct svc_fh *dst, const struct svc_fh *src) { WARN_ON(src->fh_dentry); @@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) } static inline void -fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) { dst->fh_size = src->fh_size; memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); @@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; @@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } -static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_fsid_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; -- GitLab From 6ee5c4e269a9136da48df4126c4dde9b899d35cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:22 -0400 Subject: [PATCH 0557/1418] NFSD: Update file_hashtbl() helpers [ Upstream commit 3fe828caddd81e68e9d29353c6e9285a658ca056 ] Enable callers to use const pointers for type safety. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d55812f5a2ae..b6e7db0f6a694 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -721,7 +721,7 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int file_hashval(struct svc_fh *fh) +static unsigned int file_hashval(const struct svc_fh *fh) { struct inode *inode = d_inode(fh->fh_dentry); @@ -4687,7 +4687,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) /* search file_hashtbl[] for file */ static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +find_file_locked(const struct svc_fh *fh, unsigned int hashval) { struct nfs4_file *fp; -- GitLab From c8d8876aae34f2609d3ea815106024645fa85112 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:28 -0400 Subject: [PATCH 0558/1418] NFSD: Clean up nfsd4_init_file() [ Upstream commit 81a21fa3e7fdecb3c5b97014f0fc5a17d5806cae ] Name this function more consistently. I'm going to use nfsd4_file_ and nfsd4_file_hash_ for these helpers. Change the @fh parameter to be const pointer for better type safety. Finally, move the hash insertion operation to the caller. This is typical for most other "init_object" type helpers, and it is where most of the other nfs4_file hash table operations are located. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b6e7db0f6a694..80e11da952829 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4278,11 +4278,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) -{ - lockdep_assert_held(&state_lock); +static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +{ refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4300,7 +4298,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4718,7 +4715,8 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, fp->fi_aliased = alias_found = true; } if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); + nfsd4_file_init(fh, new); + hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); new->fi_aliased = alias_found; ret = new; } -- GitLab From 5aa0c564c017a008b3d971a6228cfae171695f57 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:34 -0400 Subject: [PATCH 0559/1418] NFSD: Add a nfsd4_file_hash_remove() helper [ Upstream commit 3341678f2fd6106055cead09e513fad6950a0d19 ] Refactor to relocate hash deletion operation to a helper function that is close to most other nfs4_file data structure operations. The "noinline" annotation will become useful in a moment when the hlist_del_rcu() is replaced with a more complex rhash remove operation. It also guarantees that hash remove operations can be traced with "-p function -l remove_nfs4_file_locked". This also simplifies the organization of forward declarations: the to-be-added rhashtable and its param structure will be defined /after/ put_nfs4_file(). Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 80e11da952829..8e26edbe54a33 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -84,6 +84,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -591,7 +592,7 @@ put_nfs4_file(struct nfs4_file *fi) might_lock(&state_lock); if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); + nfsd4_file_hash_remove(fi); spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); @@ -4750,6 +4751,11 @@ find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) return insert_file(new, fh, hashval); } +static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) +{ + hlist_del_rcu(&fi->fi_hash); +} + /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid -- GitLab From 0d4150f5eb20b2f14153474af7ca3a26814850a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:41 -0400 Subject: [PATCH 0560/1418] NFSD: Clean up find_or_add_file() [ Upstream commit 9270fc514ba7d415636b23bcb937573a1ce54f6a ] Remove the call to find_file_locked() in insert_nfs4_file(). Tracing shows that over 99% of these calls return NULL. Thus it is not worth the expense of the extra bucket list traversal. insert_file() already deals correctly with the case where the item is already in the hash bucket. Since nfsd4_file_hash_insert() is now just a wrapper around insert_file(), move the meat of insert_file() into nfsd4_file_hash_insert() and get rid of it. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 64 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e26edbe54a33..da6a7574ac558 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4699,24 +4699,42 @@ find_file_locked(const struct svc_fh *fh, unsigned int hashval) return NULL; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +static struct nfs4_file * find_file(struct svc_fh *fh) { struct nfs4_file *fp; + unsigned int hashval = file_hashval(fh); + + rcu_read_lock(); + fp = find_file_locked(fh, hashval); + rcu_read_unlock(); + return fp; +} + +/* + * On hash insertion, identify entries with the same inode but + * distinct filehandles. They will all be in the same hash bucket + * because nfs4_file's are hashed by the address in the fi_inode + * field. + */ +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) +{ + unsigned int hashval = file_hashval(fhp); struct nfs4_file *ret = NULL; bool alias_found = false; + struct nfs4_file *fi; spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) + ret = fi; + } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) + fi->fi_aliased = alias_found = true; } if (likely(ret == NULL)) { - nfsd4_file_init(fh, new); + nfsd4_file_init(fhp, new); hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); new->fi_aliased = alias_found; ret = new; @@ -4725,32 +4743,6 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, return ret; } -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - return fp; -} - -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; - - return insert_file(new, fh, hashval); -} - static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { hlist_del_rcu(&fi->fi_hash); @@ -5661,7 +5653,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = nfsd4_file_hash_insert(open->op_file, current_fh); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) -- GitLab From 49e8d9f465006ba7197cbcc6d297528b72a2f196 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:47 -0400 Subject: [PATCH 0561/1418] NFSD: Refactor find_file() [ Upstream commit 15424748001a9b5ea62b3e6ad45f0a8b27f01df9 ] find_file() is now the only caller of find_file_locked(), so just fold these two together. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da6a7574ac558..78b8f1442e005 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4683,31 +4683,24 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(const struct svc_fh *fh, unsigned int hashval) +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - struct nfs4_file *fp; + unsigned int hashval = file_hashval(fhp); + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; + rcu_read_lock(); + hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) { + rcu_read_unlock(); + return fi; + } } } - return NULL; -} - -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); rcu_read_unlock(); - return fp; + return NULL; } /* @@ -4758,9 +4751,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) -- GitLab From 5a1f61516f802d95959944e7529d23dfa6868031 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:53 -0400 Subject: [PATCH 0562/1418] NFSD: Use rhashtable for managing nfs4_file objects [ Upstream commit d47b295e8d76a4d69f0e2ea0cd8a79c9d3488280 ] fh_match() is costly, especially when filehandles are large (as is the case for NFSv4). It needs to be used sparingly when searching data structures. Unfortunately, with common workloads, I see multiple thousands of objects stored in file_hashtbl[], which has just 256 buckets, making its bucket hash chains quite lengthy. Walking long hash chains with the state_lock held blocks other activity that needs that lock. Sizable hash chains are a common occurrance once the server has handed out some delegations, for example -- IIUC, each delegated file is held open on the server by an nfs4_file object. To help mitigate the cost of searching with fh_match(), replace the nfs4_file hash table with an rhashtable, which can dynamically resize its bucket array to minimize hash chain length. The result of this modification is an improvement in the latency of NFSv4 operations, and the reduction of nfsd CPU utilization due to eliminating the cost of multiple calls to fh_match() and reducing the CPU cache misses incurred while walking long hash chains in the nfs4_file hash table. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 97 +++++++++++++++++++++++++++++---------------- fs/nfsd/state.h | 5 +-- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 78b8f1442e005..64a61c3d8c609 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include #include #include +#include #include + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -589,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { + if (refcount_dec_and_test(&fi->fi_ref)) { nfsd4_file_hash_remove(fi); - spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -718,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) - -static unsigned int file_hashval(const struct svc_fh *fh) -{ - struct inode *inode = d_inode(fh->fh_dentry); +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); -} +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rlist), -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; /* * Check if courtesy clients have conflicting access and resolve it if possible @@ -4686,12 +4686,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) static noinline_for_stack struct nfs4_file * nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - unsigned int hashval = file_hashval(fhp); + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *fi; rcu_read_lock(); - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) { rcu_read_unlock(); @@ -4705,40 +4707,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp) /* * On hash insertion, identify entries with the same inode but - * distinct filehandles. They will all be in the same hash bucket - * because nfs4_file's are hashed by the address in the fi_inode - * field. + * distinct filehandles. They will all be on the list returned + * by rhltable_lookup(). + * + * inode->i_lock prevents racing insertions from adding an entry + * for the same inode/fhp pair twice. */ static noinline_for_stack struct nfs4_file * nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { - unsigned int hashval = file_hashval(fhp); + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; struct nfs4_file *fi; + int err; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { + rcu_read_lock(); + spin_lock(&inode->i_lock); + + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) ret = fi; - } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) + } else fi->fi_aliased = alias_found = true; } - if (likely(ret == NULL)) { - nfsd4_file_init(fhp, new); - hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); - new->fi_aliased = alias_found; - ret = new; - } - spin_unlock(&state_lock); + if (ret) + goto out_unlock; + + nfsd4_file_init(fhp, new); + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, + nfs4_file_rhash_params); + if (err) + goto out_unlock; + + new->fi_aliased = alias_found; + ret = new; + +out_unlock: + spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ret; } static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { - hlist_del_rcu(&fi->fi_hash); + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, + nfs4_file_rhash_params); } /* @@ -5648,6 +5666,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * If not found, create the nfs4_file struct */ fp = nfsd4_file_hash_insert(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -8064,10 +8084,16 @@ nfs4_state_start(void) { int ret; - ret = nfsd4_create_callback_queue(); + ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; + ret = nfsd4_create_callback_queue(); + if (ret) { + rhltable_destroy(&nfs4_file_rhltable); + return ret; + } + set_max_delegations(); return 0; } @@ -8098,6 +8124,7 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e2daef3cc0034..eadd7f465bf52 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhlist_head fi_rlist; struct list_head fi_stateids; union { struct list_head fi_delegations; -- GitLab From 6b12589f610ae5ca924573315b4cf3afd593cb09 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 31 Oct 2022 09:53:26 -0400 Subject: [PATCH 0563/1418] NFSD: Fix licensing header in filecache.c [ Upstream commit 3f054211b29c0fa06dfdcab402c795fd7e906be1 ] Add a missing SPDX header. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 24ed511ed0d38..e37ecd8b8197e 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Open file cache. + * The NFSD open file cache. * * (c) 2015 - Jeff Layton * -- GitLab From 1f76cb66ff2257675666172aba42b4e661809a20 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:02:30 -0500 Subject: [PATCH 0564/1418] filelock: add a new locks_inode_context accessor function [ Upstream commit 401a8b8fd5acd51582b15238d72a8d0edd580e9f ] There are a number of places in the kernel that are accessing the inode->i_flctx field without smp_load_acquire. This is required to ensure that the caller doesn't see a partially-initialized structure. Add a new accessor function for it to make this clear and convert all of the relevant accesses in locks.c to use it. Also, convert locks_free_lock_context to use the helper as well instead of just doing a "bare" assignment. Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Stable-dep-of: 77c67530e1f9 ("nfsd: use locks_inode_context helper") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 24 ++++++++++++------------ include/linux/fs.h | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 1047ab2b15e96..7d0918b8fe5d6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) void *owner; void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; @@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return false; @@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/include/linux/fs.h b/include/linux/fs.h index 67313881f8ac1..092d8fa10153f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1189,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1334,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) -- GitLab From c66f9f22e6e555edd575d471c6a309466651a2f5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:19:43 -0500 Subject: [PATCH 0565/1418] lockd: use locks_inode_context helper [ Upstream commit 98b41ffe0afdfeaa1439a5d6bd2db4a94277e31b ] lockd currently doesn't access i_flctx safely. This requires a smp_load_acquire, as the pointer is set via cmpxchg (a release operation). Cc: Trond Myklebust Cc: Anna Schumaker Cc: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svcsubs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 3515f17eaf3fb..e3b6229e7ae5c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; -- GitLab From e017486dadf9ebb890bdd654b67014a9aeaa41c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:36:07 -0500 Subject: [PATCH 0566/1418] nfsd: use locks_inode_context helper [ Upstream commit 77c67530e1f95ac25c7075635f32f04367380894 ] nfsd currently doesn't access i_flctx safely everywhere. This requires a smp_load_acquire, as the pointer is set via cmpxchg (a release operation). Acked-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 64a61c3d8c609..5d6851fd4f972 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4784,7 +4784,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) static bool nfsd4_deleg_present(const struct inode *inode) { - struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } @@ -5944,7 +5944,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; - ctx = nf->fi_inode->i_flctx; + ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) @@ -7761,7 +7761,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); -- GitLab From 8b7be6ef588e0df6036e99f0f637fdac641da396 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:50 -0400 Subject: [PATCH 0567/1418] nfsd: fix up the filecache laundrette scheduling [ Upstream commit 22ae4c114f77b55a4c5036e8f70409a0799a08f8 ] We don't really care whether there are hashed entries when it comes to scheduling the laundrette. They might all be non-gc entries, after all. We only want to schedule it if there are entries on the LRU. Switch to using list_lru_count, and move the check into nfsd_file_gc_worker. The other callsite in nfsd_file_put doesn't need to count entries, since it only schedules the laundrette after adding an entry to the LRU. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e37ecd8b8197e..697acf5c3c681 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -211,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = { static void nfsd_file_schedule_laundrette(void) { - if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || - test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) - return; - - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -614,7 +611,8 @@ static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - nfsd_file_schedule_laundrette(); + if (list_lru_count(&nfsd_file_lru)) + nfsd_file_schedule_laundrette(); } static unsigned long -- GitLab From 12e63680a76cad3bf505669b753560582ccadfcb Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 11 Nov 2022 17:18:35 +0800 Subject: [PATCH 0568/1418] NFSD: Use struct_size() helper in alloc_session() [ Upstream commit 85a0d0c9a58002ef7d1bf5e3ea630f4fbd42a4f0 ] Use struct_size() helper to simplify the code, no functional changes. Signed-off-by: Xiu Jianfeng Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d6851fd4f972..d2ed047dfdc89 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1834,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int mem, i; + int i; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); - mem = numslots * sizeof(struct nfsd4_slot *); + BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) + > PAGE_SIZE); - new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ -- GitLab From 8973a8f9b72dbafbd1083c220d975a0e7ec871d0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:36 -0500 Subject: [PATCH 0569/1418] lockd: set missing fl_flags field when retrieving args [ Upstream commit 75c7940d2a86d3f1b60a0a265478cb8fc887b970 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svc4proc.c | 1 + fs/lockd/svcproc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 284b019cb6529..b72023a6b4c16 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, *filp = file; /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index e35c05e278061..32784f508c810 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ mode = lock_to_openmode(&lock->fl); + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; -- GitLab From ccbf6efab8d37e3af007e83d7e7797f0ab2f3064 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:37 -0500 Subject: [PATCH 0570/1418] lockd: ensure we use the correct file descriptor when unlocking [ Upstream commit 69efce009f7df888e1fede3cb2913690eb829f52 ] Shared locks are set on O_RDONLY descriptors and exclusive locks are set on O_WRONLY ones. nlmsvc_unlock however calls vfs_lock_file twice, once for each descriptor, but it doesn't reset fl_file. Ensure that it does. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9c1aa75441e1c..9eae99e08e699 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - if (file->f_file[O_RDONLY]) - error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_RDONLY]; + if (lock->fl.fl_file) + error = vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); - if (file->f_file[O_WRONLY]) - error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_WRONLY]; + if (lock->fl.fl_file) + error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; -- GitLab From 0920deeec6dd2e8d142a688a81744702895d46c6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:38 -0500 Subject: [PATCH 0571/1418] lockd: fix file selection in nlmsvc_cancel_blocked [ Upstream commit 9f27783b4dd235ef3c8dbf69fc6322777450323c ] We currently do a lock_to_openmode call based on the arguments from the NLM_UNLOCK call, but that will always set the fl_type of the lock to F_UNLCK, and the O_RDONLY descriptor is always chosen. Fix it to use the file_lock from the block instead. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9eae99e08e699..4e30f3c509701 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -699,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - mode = lock_to_openmode(&lock->fl); - vfs_cancel_lock(block->b_file->f_file[mode], - &block->b_call->a_args.lock.fl); + struct file_lock *fl = &block->b_call->a_args.lock.fl; + + mode = lock_to_openmode(fl); + vfs_cancel_lock(block->b_file->f_file[mode], fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } -- GitLab From 371e1c1b326b5de0a12204f217709e2f626c7fe7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Nov 2022 08:57:43 -0500 Subject: [PATCH 0572/1418] trace: Relocate event helper files [ Upstream commit 247c01ff5f8d66e62a404c91733be52fecb8b7f6 ] Steven Rostedt says: > The include/trace/events/ directory should only hold files that > are to create events, not headers that hold helper functions. > > Can you please move them out of include/trace/events/ as that > directory is "special" in the creation of events. Signed-off-by: Chuck Lever Acked-by: Leon Romanovsky Acked-by: Steven Rostedt (Google) Acked-by: Anna Schumaker Stable-dep-of: 638593be55c0 ("NFSD: add CB_RECALL_ANY tracepoints") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 7 +++++++ drivers/infiniband/core/cm_trace.h | 2 +- drivers/infiniband/core/cma_trace.h | 2 +- fs/nfs/nfs4trace.h | 6 +++--- fs/nfs/nfstrace.h | 6 +++--- include/trace/events/rpcgss.h | 2 +- include/trace/events/rpcrdma.h | 4 ++-- include/trace/events/sunrpc.h | 2 +- include/trace/{events => misc}/fs.h | 0 include/trace/{events => misc}/nfs.h | 0 include/trace/{events => misc}/rdma.h | 0 include/trace/{events/sunrpc_base.h => misc/sunrpc.h} | 0 12 files changed, 19 insertions(+), 12 deletions(-) rename include/trace/{events => misc}/fs.h (100%) rename include/trace/{events => misc}/nfs.h (100%) rename include/trace/{events => misc}/rdma.h (100%) rename include/trace/{events/sunrpc_base.h => misc/sunrpc.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 13d1078808bb5..bbfedb0b20938 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10051,6 +10051,7 @@ F: drivers/infiniband/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: include/trace/misc/rdma.h F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c @@ -11139,6 +11140,12 @@ F: fs/nfs_common/ F: fs/nfsd/ F: include/linux/lockd/ F: include/linux/sunrpc/ +F: include/trace/events/rpcgss.h +F: include/trace/events/rpcrdma.h +F: include/trace/events/sunrpc.h +F: include/trace/misc/fs.h +F: include/trace/misc/nfs.h +F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h index e9d282679ef15..944d9071245d2 100644 --- a/drivers/infiniband/core/cm_trace.h +++ b/drivers/infiniband/core/cm_trace.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * enum ib_cm_state, from include/rdma/ib_cm.h diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e45264267bcc9..47f3c6e4be893 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -15,7 +15,7 @@ #define _TRACE_RDMA_CMA_H #include -#include +#include DECLARE_EVENT_CLASS(cma_fsm_class, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 3fa77ad7258f2..c8a57cfde64b4 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,10 +9,10 @@ #define _TRACE_NFS4_H #include -#include +#include -#include -#include +#include +#include #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8c6cc58679ff8..642f6921852fa 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,9 +11,9 @@ #include #include -#include -#include -#include +#include +#include +#include #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index c9048f3e471bb..3f121eed369e8 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,7 +13,7 @@ #include -#include +#include /** ** GSS-API related trace events diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fcd3b3f1020a6..8f461e04e5f09 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include /** ** Event classes diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f48f2ab9d238b..ffe2679a13ced 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,7 +14,7 @@ #include #include -#include +#include TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); diff --git a/include/trace/events/fs.h b/include/trace/misc/fs.h similarity index 100% rename from include/trace/events/fs.h rename to include/trace/misc/fs.h diff --git a/include/trace/events/nfs.h b/include/trace/misc/nfs.h similarity index 100% rename from include/trace/events/nfs.h rename to include/trace/misc/nfs.h diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h similarity index 100% rename from include/trace/events/rdma.h rename to include/trace/misc/rdma.h diff --git a/include/trace/events/sunrpc_base.h b/include/trace/misc/sunrpc.h similarity index 100% rename from include/trace/events/sunrpc_base.h rename to include/trace/misc/sunrpc.h -- GitLab From 4481d72a4b63eb190e71e381050aa2959226e13b Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:45 -0800 Subject: [PATCH 0573/1418] NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker [ Upstream commit a1049eb47f20b9eabf9afb218578fff16b4baca6 ] Refactoring courtesy_client_reaper to generic low memory shrinker so it can be used for other purposes. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d2ed047dfdc89..df3d421025aa8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4362,7 +4362,7 @@ out: } static unsigned long -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { int cnt; struct nfsd_net *nn = container_of(shrink, @@ -4375,7 +4375,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) } static unsigned long -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } @@ -4402,8 +4402,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); } @@ -6171,17 +6171,24 @@ laundromat_main(struct work_struct *laundry) } static void -courtesy_client_reaper(struct work_struct *reaper) +courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; - struct delayed_work *dwork = to_delayed_work(reaper); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, - nfsd_shrinker_work); nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } +static void +nfsd4_state_shrinker_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + nfsd_shrinker_work); + + courtesy_client_reaper(nn); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -8007,7 +8014,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); return 0; -- GitLab From f30f07ba5789ef5c68c2352b996d8a98fefca8a2 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:46 -0800 Subject: [PATCH 0574/1418] NFSD: add support for sending CB_RECALL_ANY [ Upstream commit 3959066b697b5dfbb7141124ae9665337d4bc638 ] Add XDR encode and decode function for CB_RECALL_ANY. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 72 ++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 5 +++ fs/nfsd/xdr4cb.h | 6 ++++ 4 files changed, 84 insertions(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39989c14c8a1e..4eae2c5af2edf 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); +} + +static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, + size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); +} + /* * nfs_cb_opnum4 * @@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, hdr->nops++; } +/* + * CB_RECALLANY4args + * + * struct CB_RECALLANY4args { + * uint32_t craa_objects_to_keep; + * bitmap4 craa_type_mask; + * }; + */ +static void +encode_cb_recallany4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) +{ + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); + encode_uint32(xdr, ra->ra_keep); + encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); + hdr->nops++; +} + /* * CB_SEQUENCE4args * @@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static void +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfsd4_cb_recall_any *ra; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recallany4args(xdr, &hdr, ra); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static int +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); + return status; +} + #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index eadd7f465bf52..e30882f8b8516 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -636,6 +636,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, + NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 36c3340c1d54a..510978e602da6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -896,5 +896,10 @@ struct nfsd4_operation { union nfsd4_op_u *); }; +struct nfsd4_cb_recall_any { + struct nfsd4_callback ra_cb; + u32 ra_keep; + u32 ra_bmval[1]; +}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 547cf07cf4e08..0d39af1b00a0f 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,3 +48,9 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 1 + 1) +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) -- GitLab From f28dae54632c5ea45f32ddc6fba494f5efc15007 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:47 -0800 Subject: [PATCH 0575/1418] NFSD: add delegation reaper to react to low memory condition [ Upstream commit 44df6f439a1790a5f602e3842879efa88f346672 ] The delegation reaper is called by nfsd memory shrinker's on the 'count' callback. It scans the client list and sends the courtesy CB_RECALL_ANY to the clients that hold delegations. To avoid flooding the clients with CB_RECALL_ANY requests, the delegation reaper sends only one CB_RECALL_ANY request to each client per 5 seconds. Signed-off-by: Dai Ngo [ cel: moved definition of RCA4_TYPE_MASK_RDATA_DLG ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 88 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/state.h | 5 +++ include/linux/nfs4.h | 13 +++++++ 3 files changed, 102 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index df3d421025aa8..1e030e0309304 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2145,6 +2145,7 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); + kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2872,6 +2873,36 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; +static int +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); +} + +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { + .done = nfsd4_cb_recall_any_done, + .release = nfsd4_cb_recall_any_release, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2909,6 +2940,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); + if (!clp->cl_ra) { + free_client(clp); + return NULL; + } + clp->cl_ra_time = 0; + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, + NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -4364,14 +4403,16 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int cnt; + int count; struct nfsd_net *nn = container_of(shrink, struct nfsd_net, nfsd_client_shrinker); - cnt = atomic_read(&nn->nfsd_courtesy_clients); - if (cnt > 0) + count = atomic_read(&nn->nfsd_courtesy_clients); + if (!count) + count = atomic_long_read(&num_delegations); + if (count) mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); - return (unsigned long)cnt; + return (unsigned long)count; } static unsigned long @@ -6179,6 +6220,44 @@ courtesy_client_reaper(struct nfsd_net *nn) nfs4_process_client_reaplist(&reaplist); } +static void +deleg_reaper(struct nfsd_net *nn) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + struct list_head cblist; + + INIT_LIST_HEAD(&cblist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state != NFSD4_ACTIVE || + list_empty(&clp->cl_delegations) || + atomic_read(&clp->cl_delegs_in_recall) || + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || + (ktime_get_boottime_seconds() - + clp->cl_ra_time < 5)) { + continue; + } + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ + atomic_inc(&clp->cl_rpc_users); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&cblist)) { + clp = list_first_entry(&cblist, struct nfs4_client, + cl_ra_cblist); + list_del_init(&clp->cl_ra_cblist); + clp->cl_ra->ra_keep = 0; + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + nfsd4_run_cb(&clp->cl_ra->ra_cb); + } +} + static void nfsd4_state_shrinker_worker(struct work_struct *work) { @@ -6187,6 +6266,7 @@ nfsd4_state_shrinker_worker(struct work_struct *work) nfsd_shrinker_work); courtesy_client_reaper(nn); + deleg_reaper(nn); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e30882f8b8516..e94634d305912 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,6 +368,7 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) +#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -411,6 +412,10 @@ struct nfs4_client { unsigned int cl_state; atomic_t cl_delegs_in_recall; + + struct nfsd4_cb_recall_any *cl_ra; + time64_t cl_ra_time; + struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a5964c4..730003c4f4af4 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif -- GitLab From 7b2b8a6c75f0c0175f626d61a74e4f7f75d38df4 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:48 -0800 Subject: [PATCH 0576/1418] NFSD: add CB_RECALL_ANY tracepoints [ Upstream commit 638593be55c0b37a1930038460a9918215d5c24b ] Add tracepoints to trace start and end of CB_RECALL_ANY operation. Signed-off-by: Dai Ngo [ cel: added show_rca_mask() macro ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 ++ fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++ include/trace/misc/nfs.h | 12 ++++++++++ 3 files changed, 64 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1e030e0309304..d8829fa53fdaa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2877,6 +2877,7 @@ static int nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, struct rpc_task *task) { + trace_nfsd_cb_recall_any_done(cb, task); switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); @@ -6254,6 +6255,7 @@ deleg_reaper(struct nfsd_net *nn) list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d261a06b61408..4183819ea0829 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,9 +9,12 @@ #define _NFSD_TRACE_H #include +#include +#include #include "export.h" #include "nfsfh.h" +#include "xdr4.h" #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ @@ -1492,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +TRACE_EVENT(nfsd_cb_recall_any, + TP_PROTO( + const struct nfsd4_cb_recall_any *ra + ), + TP_ARGS(ra), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, keep) + __field(unsigned long, bmval0) + __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; + __entry->keep = ra->ra_keep; + __entry->bmval0 = ra->ra_bmval[0]; + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, + ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->keep, show_rca_mask(__entry->bmval0) + ) +); + DECLARE_EVENT_CLASS(nfsd_cb_done_class, TP_PROTO( const stateid_t *stp, @@ -1531,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +TRACE_EVENT(nfsd_cb_recall_any_done, + TP_PROTO( + const struct nfsd4_callback *cb, + const struct rpc_task *task + ), + TP_ARGS(cb, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + ), + TP_fast_assign( + __entry->status = task->tk_status; + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->status + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 09ffdbb04134d..0d9d48dca38a8 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -360,6 +360,18 @@ TRACE_DEFINE_ENUM(IOMODE_ANY); { IOMODE_RW, "RW" }, \ { IOMODE_ANY, "ANY" }) +#define show_rca_mask(x) \ + __print_flags(x, "|", \ + { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ + { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) + #define show_nfs4_seq4_status(x) \ __print_flags(x, "|", \ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ -- GitLab From eb73733124305ce47d86d74fc3610ea7a4e55260 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 26 Nov 2022 15:55:30 -0500 Subject: [PATCH 0577/1418] NFSD: Use only RQ_DROPME to signal the need to drop a reply [ Upstream commit 9315564747cb6a570e99196b3a4880fb817635fd ] Clean up: NFSv2 has the only two usages of rpc_drop_reply in the NFSD code base. Since NFSv2 is going away at some point, replace these in order to simplify the "drop this reply?" check in nfsd_dispatch(). Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 4 ++-- fs/nfsd/nfssvc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 52fc222c34f26..a5570cf75f3fd 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + __set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + __set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6f4a38f5ab0ce..0c75636054a54 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1071,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); - if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) -- GitLab From 5c6c2fb3c12f7d7bb7f04259878ac965a8ea2d2d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 12:48:59 -0800 Subject: [PATCH 0578/1418] NFSD: Avoid clashing function prototypes [ Upstream commit e78e274eb22d966258a3845acc71d3c5b8ee2ea8 ] When built with Control Flow Integrity, function prototypes between caller and function declaration must match. These mismatches are visible at compile time with the new -Wcast-function-type-strict in Clang[1]. There were 97 warnings produced by NFS. For example: fs/nfsd/nfs4xdr.c:2228:17: warning: cast from '__be32 (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)') to 'nfsd4_dec' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, void *)') converts to incompatible function type [-Wcast-function-type-strict] [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The enc/dec callbacks were defined as passing "void *" as the second argument, but were being implicitly cast to a new type. Replace the argument with union nfsd4_op_u, and perform explicit member selection in the function body. There are no resulting binary differences. Changes were made mechanically using the following Coccinelle script, with minor by-hand fixes for members that didn't already match their existing argument name: @find@ identifier func; type T, opsT; identifier ops, N; @@ opsT ops[] = { [N] = (T) func, }; @already_void@ identifier find.func; identifier name; @@ func(..., -void +union nfsd4_op_u *name) { ... } @proto depends on !already_void@ identifier find.func; type T; identifier name; position p; @@ func@p(..., T name ) { ... } @script:python get_member@ type_name << proto.T; member; @@ coccinelle.member = cocci.make_ident(type_name.split("_", 1)[1].split(' ',1)[0]) @convert@ identifier find.func; type proto.T; identifier proto.name; position proto.p; identifier get_member.member; @@ func@p(..., - T name + union nfsd4_op_u *u ) { + T name = &u->member; ... } @cast@ identifier find.func; type T, opsT; identifier ops, N; @@ opsT ops[] = { [N] = - (T) func, }; Cc: Chuck Lever Cc: Jeff Layton Cc: Gustavo A. R. Silva Cc: linux-nfs@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 632 +++++++++++++++++++++++++++------------------- 1 file changed, 377 insertions(+), 255 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 51a598ee68fe1..597f14a80512f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, - struct nfsd4_access *access) + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) return nfserr_bad_xdr; return nfs_ok; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) return nfserr_bad_xdr; return nfsd4_decode_stateid4(argp, &close->cl_stateid); @@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) @@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; __be32 *p, status; memset(create, 0, sizeof(*create)); @@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_delegreturn *dr = &u->delegreturn; return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; @@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; @@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; __be32 status; if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) @@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lookup *lookup = &u->lookup; return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } @@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; __be32 status; u32 dummy; @@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) } static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *open_conf = &u->open_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *open_down = &u->open_downgrade; __be32 status; memset(open_down, 0, sizeof(*open_down)); @@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_putfh *putfh = &u->putfh; __be32 *p; if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) @@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; __be32 status; memset(read, 0, sizeof(*read)); @@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; __be32 status; memset(readdir, 0, sizeof(*readdir)); @@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; __be32 status; memset(rename, 0, sizeof(*rename)); @@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + clientid_t *clientid = &u->renew; return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; __be32 status; memset(setattr, 0, sizeof(*setattr)); @@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setclientid *setclientid = &u->setclientid; __be32 *p, status; memset(setclientid, 0, sizeof(*setclientid)); @@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_verify *verify = &u->verify; __be32 *p, status; memset(verify, 0, sizeof(*verify)); @@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; __be32 status; status = nfsd4_decode_stateid4(argp, &write->wr_stateid); @@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; __be32 status; if (argp->minorversion >= 1) @@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; u32 use_conn_in_rdma_mode; __be32 status; @@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; __be32 status; memset(exid, 0, sizeof(*exid)); @@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; __be32 status; memset(sess, 0, sizeof(*sess)); @@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) + union nfsd4_op_u *u) { + struct nfsd4_destroy_session *destroy_session = &u->destroy_session; return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) + union nfsd4_op_u *u) { + struct nfsd4_free_stateid *free_stateid = &u->free_stateid; return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; __be32 status; memset(gdev, 0, sizeof(*gdev)); @@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; __be32 *p, status; memset(lcp, 0, sizeof(*lcp)); @@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; __be32 status; memset(lgp, 0, sizeof(*lgp)); @@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; @@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, #endif /* CONFIG_NFSD_PNFS */ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; @@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; __be32 *p, status; status = nfsd4_decode_sessionid4(argp, &seq->sessionid); @@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; __be32 status; u32 i; @@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta } static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_clientid *dc) + union nfsd4_op_u *u) { + struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; return nfsd4_decode_clientid4(argp, &dc->clientid); } static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - struct nfsd4_reclaim_complete *rc) + union nfsd4_op_u *u) { + struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) return nfserr_bad_xdr; return nfs_ok; @@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) + union nfsd4_op_u *u) { + struct nfsd4_fallocate *fallocate = &u->allocate; __be32 status; status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); @@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; __be32 status; @@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; memset(cn, 0, sizeof(*cn)); @@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; os->count = 0; os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 status; status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); @@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_clone *clone = &u->clone; __be32 status; status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); @@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; @@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; u32 flags, maxcount, size; __be32 status; @@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; u32 maxcount; memset(listxattrs, 0, sizeof(*listxattrs)); @@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + [OP_ACCESS] = nfsd4_decode_access, + [OP_CLOSE] = nfsd4_decode_close, + [OP_COMMIT] = nfsd4_decode_commit, + [OP_CREATE] = nfsd4_decode_create, + [OP_DELEGPURGE] = nfsd4_decode_notsupp, + [OP_DELEGRETURN] = nfsd4_decode_delegreturn, + [OP_GETATTR] = nfsd4_decode_getattr, + [OP_GETFH] = nfsd4_decode_noop, + [OP_LINK] = nfsd4_decode_link, + [OP_LOCK] = nfsd4_decode_lock, + [OP_LOCKT] = nfsd4_decode_lockt, + [OP_LOCKU] = nfsd4_decode_locku, + [OP_LOOKUP] = nfsd4_decode_lookup, + [OP_LOOKUPP] = nfsd4_decode_noop, + [OP_NVERIFY] = nfsd4_decode_verify, + [OP_OPEN] = nfsd4_decode_open, + [OP_OPENATTR] = nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, + [OP_PUTFH] = nfsd4_decode_putfh, + [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = nfsd4_decode_noop, + [OP_READ] = nfsd4_decode_read, + [OP_READDIR] = nfsd4_decode_readdir, + [OP_READLINK] = nfsd4_decode_noop, + [OP_REMOVE] = nfsd4_decode_remove, + [OP_RENAME] = nfsd4_decode_rename, + [OP_RENEW] = nfsd4_decode_renew, + [OP_RESTOREFH] = nfsd4_decode_noop, + [OP_SAVEFH] = nfsd4_decode_noop, + [OP_SECINFO] = nfsd4_decode_secinfo, + [OP_SETATTR] = nfsd4_decode_setattr, + [OP_SETCLIENTID] = nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = nfsd4_decode_verify, + [OP_WRITE] = nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, + [OP_LAYOUTGET] = nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_decode_sequence, + [OP_SET_SSV] = nfsd4_decode_notsupp, + [OP_TEST_STATEID] = nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, - [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + [OP_ALLOCATE] = nfsd4_decode_fallocate, + [OP_COPY] = nfsd4_decode_copy, + [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = nfsd4_decode_fallocate, + [OP_IO_ADVISE] = nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, + [OP_READ_PLUS] = nfsd4_decode_read, + [OP_SEEK] = nfsd4_decode_seek, + [OP_WRITE_SAME] = nfsd4_decode_notsupp, + [OP_CLONE] = nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, - [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, - [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, + [OP_GETXATTR] = nfsd4_decode_getxattr, + [OP_SETXATTR] = nfsd4_decode_setxattr, + [OP_LISTXATTRS] = nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_decode_removexattr, }; static inline bool @@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); @@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; @@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; @@ -3775,8 +3848,10 @@ again: } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; if (!nfserr) @@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) @@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); @@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); @@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; @@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readlink *readlink = &u->readlink; __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; @@ -4110,8 +4200,10 @@ out_err: } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; @@ -4201,8 +4293,10 @@ err_no_verf: } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); @@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo_no_name *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); @@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; @@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_test_stateid *test_stateid) + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; @@ -4611,8 +4720,9 @@ toosmall: static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy *copy) + union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, @@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4813,8 +4927,9 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; int starting_len = xdr->buf->len; @@ -4850,8 +4965,9 @@ out: static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4885,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_seek *seek) + union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 *p; p = xdr_reserve_space(resp->xdr, 4 + 8); @@ -4897,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *p) { return nfserr; } @@ -4948,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p, err; @@ -4972,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5013,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; @@ -5124,8 +5245,9 @@ out: static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5137,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5145,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, - [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, - [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, - [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, - [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, - [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, - [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, - [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, - [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, - [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, - [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, - [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, - [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_READ] = (nfsd4_enc)nfsd4_encode_read, - [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, - [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, - [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, - [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, - [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, - [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, - [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, - [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ACCESS] = nfsd4_encode_access, + [OP_CLOSE] = nfsd4_encode_close, + [OP_COMMIT] = nfsd4_encode_commit, + [OP_CREATE] = nfsd4_encode_create, + [OP_DELEGPURGE] = nfsd4_encode_noop, + [OP_DELEGRETURN] = nfsd4_encode_noop, + [OP_GETATTR] = nfsd4_encode_getattr, + [OP_GETFH] = nfsd4_encode_getfh, + [OP_LINK] = nfsd4_encode_link, + [OP_LOCK] = nfsd4_encode_lock, + [OP_LOCKT] = nfsd4_encode_lockt, + [OP_LOCKU] = nfsd4_encode_locku, + [OP_LOOKUP] = nfsd4_encode_noop, + [OP_LOOKUPP] = nfsd4_encode_noop, + [OP_NVERIFY] = nfsd4_encode_noop, + [OP_OPEN] = nfsd4_encode_open, + [OP_OPENATTR] = nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, + [OP_PUTFH] = nfsd4_encode_noop, + [OP_PUTPUBFH] = nfsd4_encode_noop, + [OP_PUTROOTFH] = nfsd4_encode_noop, + [OP_READ] = nfsd4_encode_read, + [OP_READDIR] = nfsd4_encode_readdir, + [OP_READLINK] = nfsd4_encode_readlink, + [OP_REMOVE] = nfsd4_encode_remove, + [OP_RENAME] = nfsd4_encode_rename, + [OP_RENEW] = nfsd4_encode_noop, + [OP_RESTOREFH] = nfsd4_encode_noop, + [OP_SAVEFH] = nfsd4_encode_noop, + [OP_SECINFO] = nfsd4_encode_secinfo, + [OP_SETATTR] = nfsd4_encode_setattr, + [OP_SETCLIENTID] = nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, + [OP_VERIFY] = nfsd4_encode_noop, + [OP_WRITE] = nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = nfsd4_encode_noop, + [OP_FREE_STATEID] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = nfsd4_encode_noop, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, + [OP_LAYOUTGET] = nfsd4_encode_noop, + [OP_LAYOUTRETURN] = nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, - [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_encode_sequence, + [OP_SET_SSV] = nfsd4_encode_noop, + [OP_TEST_STATEID] = nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, - [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, - [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ALLOCATE] = nfsd4_encode_noop, + [OP_COPY] = nfsd4_encode_copy, + [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = nfsd4_encode_noop, + [OP_IO_ADVISE] = nfsd4_encode_noop, + [OP_LAYOUTERROR] = nfsd4_encode_noop, + [OP_LAYOUTSTATS] = nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, + [OP_READ_PLUS] = nfsd4_encode_read_plus, + [OP_SEEK] = nfsd4_encode_seek, + [OP_WRITE_SAME] = nfsd4_encode_noop, + [OP_CLONE] = nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, - [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, - [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, + [OP_GETXATTR] = nfsd4_encode_getxattr, + [OP_SETXATTR] = nfsd4_encode_setxattr, + [OP_LISTXATTRS] = nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_encode_removexattr, }; /* -- GitLab From ce606d5334c2abd772bac18c5ee83f3dd82f2a11 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 7 Jan 2023 10:15:35 -0500 Subject: [PATCH 0579/1418] NFSD: Use set_bit(RQ_DROPME) [ Upstream commit 5304930dbae82d259bcf7e5611db7c81e7a42eff ] The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Fixes: 9315564747cb ("NFSD: Use only RQ_DROPME to signal the need to drop a reply") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3fd..9744443c39652 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } -- GitLab From c479755cb80a85bbd7569fa7a7e133a66f792a31 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 12:17:09 -0800 Subject: [PATCH 0580/1418] NFSD: register/unregister of nfsd-client shrinker at nfsd startup/shutdown time [ Upstream commit f385f7d244134246f984975ed34cd75f77de479f ] Currently the nfsd-client shrinker is registered and unregistered at the time the nfsd module is loaded and unloaded. The problem with this is the shrinker is being registered before all of the relevant fields in nfsd_net are initialized when nfsd is started. This can lead to an oops when memory is low and the shrinker is called while nfsd is not running. This patch moves the register/unregister of nfsd-client shrinker from module load/unload time to nfsd startup/shutdown time. Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition") Reported-by: Mike Galbraith Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 22 +++++++++++----------- fs/nfsd/nfsctl.c | 7 +------ fs/nfsd/nfsd.h | 6 ++---- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d8829fa53fdaa..02a3629ba307f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4422,7 +4422,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4444,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -8099,8 +8089,17 @@ static int nfs4_state_create_net(struct net *net) INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8193,6 +8192,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a8884d0b4638c..76a60e7a75097 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1452,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1464,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1481,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91b..fa0144a742678 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) -- GitLab From f3ea5ec83d1a827f074b2b660749817e0bf2b23e Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 16:06:51 -0800 Subject: [PATCH 0581/1418] NFSD: replace delayed_work with work_struct for nfsd_client_shrinker [ Upstream commit 7c24fa225081f31bc6da6a355c1ba801889ab29a ] Since nfsd4_state_shrinker_count always calls mod_delayed_work with 0 delay, we can replace delayed_work with work_struct to save some space and overhead. Also add the call to cancel_work after unregister the shrinker in nfs4_state_shutdown_net. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4state.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285bb..51a4b7885cae2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 02a3629ba307f..a6fcde4f7b13a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4412,7 +4412,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -6253,8 +6253,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -8086,7 +8085,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; @@ -8193,6 +8192,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); -- GitLab From 56587affe21c5cd806523a89efd8da5b49872a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Feb 2023 07:50:08 -0500 Subject: [PATCH 0582/1418] nfsd: don't destroy global nfs4_file table in per-net shutdown [ Upstream commit 4102db175b5d884d133270fdbd0e59111ce688fc ] The nfs4_file table is global, so shutting it down when a containerized nfsd is shut down is wrong and can lead to double-frees. Tear down the nfs4_file_rhltable in nfs4_state_shutdown instead of nfs4_state_shutdown_net. Fixes: d47b295e8d76 ("NFSD: Use rhashtable for managing nfs4_file objects") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2169017 Reported-by: JianHong Yin Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a6fcde4f7b13a..b9d694ec25d19 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8212,7 +8212,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8222,6 +8221,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void -- GitLab From e58f2862e9fe500b073d20f94e73abc52fb70634 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:39 +0100 Subject: [PATCH 0583/1418] arm64: efi: Limit allocations to 48-bit addressable physical region From: Ard Biesheuvel [ Commit a37dac5c5dcfe0f1fd58513c16cdbc280a47f628 upstream ] The UEFI spec does not mention or reason about the configured size of the virtual address space at all, but it does mention that all memory should be identity mapped using a page size of 4 KiB. This means that a LPA2 capable system that has any system memory outside of the 48-bit addressable physical range and follows the spec to the letter may serve page allocation requests from regions of memory that the kernel cannot access unless it was built with LPA2 support and enables it at runtime. So let's ensure that all page allocations are limited to the 48-bit range. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/efi.h | 1 + drivers/firmware/efi/libstub/alignedmem.c | 2 ++ drivers/firmware/efi/libstub/arm64-stub.c | 5 +++-- drivers/firmware/efi/libstub/efistub.h | 4 ++++ drivers/firmware/efi/libstub/mem.c | 2 ++ drivers/firmware/efi/libstub/randomalloc.c | 2 +- 6 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 62c846be2d76a..a75c0772ecfca 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si) } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) /* * On ARM systems, virtually remapped UEFI runtime services are set up in two diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 174832661251e..6b83c492c3b82 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -29,6 +29,8 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, efi_status_t status; int slack; + max = min(max, EFI_ALLOC_LIMIT); + if (align < EFI_ALLOC_ALIGN) align = EFI_ALLOC_ALIGN; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 08f46c072da56..40275c3131c8e 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -191,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (status != EFI_SUCCESS) { if (!check_image_region((u64)_text, kernel_memsize)) { efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); - } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { + } else if (IS_ALIGNED((u64)_text, min_kimg_align) && + (u64)_end < EFI_ALLOC_LIMIT) { /* * Just execute from wherever we were loaded by the - * UEFI PE/COFF loader if the alignment is suitable. + * UEFI PE/COFF loader if the placement is suitable. */ *image_addr = (u64)_text; *reserve_size = 0; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ab505b07e626b..002f02a6d3598 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,6 +29,10 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#ifndef EFI_ALLOC_LIMIT +#define EFI_ALLOC_LIMIT ULONG_MAX +#endif + extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 03d147f17185b..4f1fa302234d8 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -89,6 +89,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, efi_physical_addr_t alloc_addr; efi_status_t status; + max = min(max, EFI_ALLOC_LIMIT); + if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, EFI_ALLOC_ALIGN, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index ec44bb7e092fa..1692d19ae80f0 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -29,7 +29,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)ULONG_MAX); + (u64)EFI_ALLOC_LIMIT); if (region_end < size) return 0; -- GitLab From 33d064aecd89846d5cf284ab75eeb9098b5ff49e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:40 +0100 Subject: [PATCH 0584/1418] efi: efivars: prevent double registration From: Johan Hovold [ Commit 0217a40d7ba6e71d7f3422fbe89b436e8ee7ece7 upstream ] Add the missing sanity check to efivars_register() so that it is no longer possible to override an already registered set of efivar ops (without first deregistering them). This can help debug initialisation ordering issues where drivers have so far unknowingly been relying on overriding the generic ops. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 0ba9f18312f5b..4ca256bcd6971 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { + int rv; + if (down_interruptible(&efivars_lock)) return -EINTR; + if (__efivars) { + pr_warn("efivars already registered\n"); + rv = -EBUSY; + goto out; + } + efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; pr_info("Registered efivars operations\n"); - + rv = 0; +out: up(&efivars_lock); - return 0; + return rv; } EXPORT_SYMBOL_GPL(efivars_register); -- GitLab From f0acafd6f79fa6068b7fc4af7980ac9bbd14f1d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:41 +0100 Subject: [PATCH 0585/1418] x86/efistub: Simplify and clean up handover entry code From: Ard Biesheuvel [ Commit df9215f15206c2a81909ccf60f21d170801dce38 upstream ] Now that the EFI entry code in assembler is only used by the optional and deprecated EFI handover protocol, and given that the EFI stub C code no longer returns to it, most of it can simply be dropped. While at it, clarify the symbol naming, by merging efi_main() and efi_stub_entry(), making the latter the shared entry point for all different boot modes that enter via the EFI stub. The efi32_stub_entry() and efi64_stub_entry() names are referenced explicitly by the tooling that populates the setup header, so these must be retained, but can be emitted as aliases of efi_stub_entry() where appropriate. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-5-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/boot.rst | 2 +- arch/x86/boot/compressed/efi_mixed.S | 22 ++++++++++++---------- arch/x86/boot/compressed/head_32.S | 11 ----------- arch/x86/boot/compressed/head_64.S | 12 ++---------- drivers/firmware/efi/libstub/x86-stub.c | 20 ++++++++++++++++---- 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index 894a198970055..bac3789f3e8fa 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware. The function prototype for the handover entry point looks like this:: - efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) + efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) 'handle' is the EFI image handle passed to the boot loader by the EFI firmware, 'table' is the EFI system table - these are the first two diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8b02e507d3bb0..d05f0250bbbc2 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -26,8 +26,8 @@ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() * is the first thing that runs after switching to long mode. Depending on * whether the EFI handover protocol or the compat entry point was used to - * enter the kernel, it will either branch to the 64-bit EFI handover - * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a * struct bootparams pointer as the third argument, so the presence of such a * pointer is used to disambiguate. @@ -37,21 +37,23 @@ * | efi32_pe_entry |---->| | | +-----------+--+ * +------------------+ | | +------+----------------+ | * | startup_32 |---->| startup_64_mixed_mode | | - * +------------------+ | | +------+----------------+ V - * | efi32_stub_entry |---->| | | +------------------+ - * +------------------+ +------------+ +---->| efi64_stub_entry | - * +-------------+----+ - * +------------+ +----------+ | - * | startup_64 |<----| efi_main |<--------------+ - * +------------+ +----------+ + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ */ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx - jnz efi64_stub_entry + jnz efi_stub_entry +#endif /* * efi_pe_entry uses MS calling convention, which requires 32 bytes of * shadow space on the stack even if all arguments are passed in diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3ecc1bbe971e1..3af4a383615b3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -150,17 +150,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafd0a59f3961..d4ccae574c4f3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -474,19 +474,11 @@ SYM_CODE_START(startup_64) jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) .org 0x390 -#endif SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax + jmp efi_stub_entry SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9422fddfbc8f1..9661d5a5769e5 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -774,9 +774,9 @@ static void __noreturn enter_kernel(unsigned long kernel_addr, * return. On failure, it will exit to the firmware via efi_exit() instead of * returning. */ -asmlinkage unsigned long efi_main(efi_handle_t handle, - efi_system_table_t *sys_table_arg, - struct boot_params *boot_params) +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; @@ -919,7 +919,19 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, enter_kernel(bzimage_addr, boot_params); fail: - efi_err("efi_main() failed!\n"); + efi_err("efi_stub_entry() failed!\n"); efi_exit(handle, status); } + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#ifndef CONFIG_EFI_MIXED +extern __alias(efi_stub_entry) +void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + +extern __alias(efi_stub_entry) +void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); +#endif +#endif -- GitLab From 1f3fd81bff03355c3acc8558c3c4da2f2d4e1d18 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:42 +0100 Subject: [PATCH 0586/1418] x86/decompressor: Avoid magic offsets for EFI handover entrypoint From: Ard Biesheuvel [ Commit 12792064587623065250069d1df980e2c9ac3e67 upstream ] The native 32-bit or 64-bit EFI handover protocol entrypoint offset relative to the respective startup_32/64 address is described in boot_params as handover_offset, so that the special Linux/x86 aware EFI loader can find it there. When mixed mode is enabled, this single field has to describe this offset for both the 32-bit and 64-bit entrypoints, so their respective relative offsets have to be identical. Given that startup_32 and startup_64 are 0x200 bytes apart, and the EFI handover entrypoint resides at a fixed offset, the 32-bit and 64-bit versions of those entrypoints must be exactly 0x200 bytes apart as well. Currently, hard-coded fixed offsets are used to ensure this, but it is sufficient to emit the 64-bit entrypoint 0x200 bytes after the 32-bit one, wherever it happens to reside. This allows this code (which is now EFI mixed mode specific) to be moved into efi_mixed.S and out of the startup code in head_64.S. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-6-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++++++- arch/x86/boot/compressed/head_64.S | 18 ------------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index d05f0250bbbc2..deb36129e3a95 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -146,6 +146,16 @@ SYM_FUNC_START(__efi64_thunk) SYM_FUNC_END(__efi64_thunk) .code32 +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +SYM_FUNC_START(efi32_stub_entry) + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) +#endif + /* * EFI service pointer must be in %edi. * @@ -226,7 +236,7 @@ SYM_FUNC_END(efi_enter32) * stub may still exit and return to the firmware using the Exit() EFI boot * service.] */ -SYM_FUNC_START(efi32_entry) +SYM_FUNC_START_LOCAL(efi32_entry) call 1f 1: pop %ebx @@ -326,6 +336,14 @@ SYM_FUNC_START(efi32_pe_entry) RET SYM_FUNC_END(efi32_pe_entry) +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + .org efi32_stub_entry + 0x200 + .code64 +SYM_FUNC_START_NOALIGN(efi64_stub_entry) + jmp efi_stub_entry +SYM_FUNC_END(efi64_stub_entry) +#endif + .section ".rodata" /* EFI loaded image protocol GUID */ .balign 4 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d4ccae574c4f3..9a0d83b4d266d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -286,17 +286,6 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) - .org 0x190 -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - jmp efi32_entry -SYM_FUNC_END(efi32_stub_entry) -#endif - .code64 .org 0x200 SYM_CODE_START(startup_64) @@ -474,13 +463,6 @@ SYM_CODE_START(startup_64) jmp *%rax SYM_CODE_END(startup_64) -#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - jmp efi_stub_entry -SYM_FUNC_END(efi64_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) -- GitLab From 34378d7ad273ff859c1ed9ab77bb71e55f652b06 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:43 +0100 Subject: [PATCH 0587/1418] x86/efistub: Clear BSS in EFI handover protocol entrypoint From: Ard Biesheuvel [ Commit d7156b986d4cc0657fa6dc05c9fcf51c3d55a0fe upstream ] The so-called EFI handover protocol is value-add from the distros that permits a loader to simply copy a PE kernel image into memory and call an alternative entrypoint that is described by an embedded boot_params structure. Most implementations of this protocol do not bother to check the PE header for minimum alignment, section placement, etc, and therefore also don't clear the image's BSS, or even allocate enough memory for it. Allocating more memory on the fly is rather difficult, but at least clear the BSS region explicitly when entering in this manner, so that the EFI stub code does not get confused by global variables that were not zero-initialized correctly. When booting in mixed mode, this BSS clearing must occur before any global state is created, so clear it in the 32-bit asm entry point. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-7-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 14 +++++++++++++- drivers/firmware/efi/libstub/x86-stub.c | 13 +++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index deb36129e3a95..d6d1b76b594d9 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -148,6 +148,18 @@ SYM_FUNC_END(__efi64_thunk) .code32 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + add $0x4, %esp /* Discard return address */ popl %ecx popl %edx @@ -340,7 +352,7 @@ SYM_FUNC_END(efi32_pe_entry) .org efi32_stub_entry + 0x200 .code64 SYM_FUNC_START_NOALIGN(efi64_stub_entry) - jmp efi_stub_entry + jmp efi_handover_entry SYM_FUNC_END(efi64_stub_entry) #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9661d5a5769e5..764bac6b58f96 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -925,12 +925,21 @@ fail: } #ifdef CONFIG_EFI_HANDOVER_PROTOCOL +void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + extern char _bss[], _ebss[]; + + memset(_bss, 0, _ebss - _bss); + efi_stub_entry(handle, sys_table_arg, boot_params); +} + #ifndef CONFIG_EFI_MIXED -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); #endif -- GitLab From 8ff6d88c0443acdd4199aacb69f1dd4a24120e8e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:45 +0100 Subject: [PATCH 0588/1418] efi/libstub: Add memory attribute protocol definitions From: Evgeniy Baskov [ Commit 79729f26b074a5d2722c27fa76cc45ef721e65cd upstream ] EFI_MEMORY_ATTRIBUTE_PROTOCOL servers as a better alternative to DXE services for setting memory attributes in EFI Boot Services environment. This protocol is better since it is a part of UEFI specification itself and not UEFI PI specification like DXE services. Add EFI_MEMORY_ATTRIBUTE_PROTOCOL definitions. Support mixed mode properly for its calls. Tested-by: Mario Limonciello Signed-off-by: Evgeniy Baskov Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/efi.h | 7 +++++++ drivers/firmware/efi/libstub/efistub.h | 20 ++++++++++++++++++++ include/linux/efi.h | 1 + 3 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 233ae6986d6f2..522ff2e443b37 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -325,6 +325,13 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) +/* Memory Attribute Protocol */ +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 002f02a6d3598..6f5a1a16db15b 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -419,6 +419,26 @@ union efi_dxe_services_table { } mixed_mode; }; +typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t; + +union efi_memory_attribute_protocol { + struct { + efi_status_t (__efiapi *get_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *); + + efi_status_t (__efiapi *set_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + + efi_status_t (__efiapi *clear_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + }; + struct { + u32 get_memory_attributes; + u32 set_memory_attributes; + u32 clear_memory_attributes; + } mixed_mode; +}; + typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; union efi_uga_draw_protocol { diff --git a/include/linux/efi.h b/include/linux/efi.h index 4e1bfee9675d2..de6d6558a4d30 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) -- GitLab From 476a48cd37c948b160cc3d5ff5b4d2e711f1ca36 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:46 +0100 Subject: [PATCH 0589/1418] efi/libstub: Add limit argument to efi_random_alloc() From: Ard Biesheuvel [ Commit bc5ddceff4c14494d83449ad45c985e6cd353fce upstream ] x86 will need to limit the kernel memory allocation to the lowest 512 MiB of memory, to match the behavior of the existing bare metal KASLR physical randomization logic. So in preparation for that, add a limit parameter to efi_random_alloc() and wire it up. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-22-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 2 +- drivers/firmware/efi/libstub/randomalloc.c | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 40275c3131c8e..16377b4521190 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -181,7 +181,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE); + EFI_LOADER_CODE, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6f5a1a16db15b..8a343ea1231a2 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -905,7 +905,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type); + int memory_type, unsigned long alloc_limit); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 1692d19ae80f0..ed6f6087a9eac 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -16,7 +16,8 @@ */ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, - unsigned long align_shift) + unsigned long align_shift, + u64 alloc_limit) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)EFI_ALLOC_LIMIT); + alloc_limit); if (region_end < size) return 0; @@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type) + int memory_type, + unsigned long alloc_limit) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align)); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) -- GitLab From 350265a753d8b39e2bb11660f2109c8dd5306b45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:47 +0100 Subject: [PATCH 0590/1418] x86/efistub: Perform 4/5 level paging switch from the stub From: Ard Biesheuvel [ Commit cb380000dd23cbbf8bd7d023b51896804c1f7e68 upstream ] In preparation for updating the EFI stub boot flow to avoid the bare metal decompressor code altogether, implement the support code for switching between 4 and 5 levels of paging before jumping to the kernel proper. This reuses the newly refactored trampoline that the bare metal decompressor uses, but relies on EFI APIs to allocate 32-bit addressable memory and remap it with the appropriate permissions. Given that the bare metal decompressor will no longer call into the trampoline if the number of paging levels is already set correctly, it is no longer needed to remove NX restrictions from the memory range where this trampoline may end up. Acked-by: Kirill A. Shutemov Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 1 + .../firmware/efi/libstub/efi-stub-helper.c | 2 + drivers/firmware/efi/libstub/efistub.h | 1 + drivers/firmware/efi/libstub/x86-5lvl.c | 95 +++++++++++++++++++ drivers/firmware/efi/libstub/x86-stub.c | 40 +++----- drivers/firmware/efi/libstub/x86-stub.h | 17 ++++ 6 files changed, 130 insertions(+), 26 deletions(-) create mode 100644 drivers/firmware/efi/libstub/x86-5lvl.c create mode 100644 drivers/firmware/efi/libstub/x86-stub.h diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index b6e1dcb98a64c..473ef18421db0 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_X86_64) += x86-5lvl.o lib-$(CONFIG_RISCV) += riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch-stub.o diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3d9b2469a0dfd..97744822dd951 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline) efi_loglevel = CONSOLE_LOGLEVEL_QUIET; } else if (!strcmp(param, "noinitrd")) { efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 8a343ea1231a2..4b4055877f3d3 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -33,6 +33,7 @@ #define EFI_ALLOC_LIMIT ULONG_MAX #endif +extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c new file mode 100644 index 0000000000000..479dd445acdcf --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-5lvl.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#include +#include +#include + +#include "efistub.h" +#include "x86-stub.h" + +bool efi_no5lvl; + +static void (*la57_toggle)(void *cr3); + +static const struct desc_struct gdt[] = { + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), +}; + +/* + * Enabling (or disabling) 5 level paging is tricky, because it can only be + * done from 32-bit mode with paging disabled. This means not only that the + * code itself must be running from 32-bit addressable physical memory, but + * also that the root page table must be 32-bit addressable, as programming + * a 64-bit value into CR3 when running in 32-bit mode is not supported. + */ +efi_status_t efi_setup_5level_paging(void) +{ + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; + efi_status_t status; + u8 *la57_code; + + if (!efi_is_64bit()) + return EFI_SUCCESS; + + /* check for 5 level paging support */ + if (native_cpuid_eax(0) < 7 || + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return EFI_SUCCESS; + + /* allocate some 32-bit addressable memory for code and a page table */ + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, + U32_MAX); + if (status != EFI_SUCCESS) + return status; + + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); + + /* + * To avoid the need to allocate a 32-bit addressable stack, the + * trampoline uses a LJMP instruction to switch back to long mode. + * LJMP takes an absolute destination address, which needs to be + * fixed up at runtime. + */ + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; + + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); + + return EFI_SUCCESS; +} + +void efi_5level_switch(void) +{ + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; + bool have_la57 = native_read_cr4() & X86_CR4_LA57; + bool need_toggle = want_la57 ^ have_la57; + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; + u64 *cr3 = (u64 *)__native_read_cr3(); + u64 *new_cr3; + + if (!la57_toggle || !need_toggle) + return; + + if (!have_la57) { + /* + * 5 level paging will be enabled, so a root level page needs + * to be allocated from the 32-bit addressable physical region, + * with its first entry referring to the existing hierarchy. + */ + new_cr3 = memset(pgt, 0, PAGE_SIZE); + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; + } else { + /* take the new root table pointer from the current entry #0 */ + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); + + /* copy the new root table if it is not 32-bit addressable */ + if ((u64)new_cr3 > U32_MAX) + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); + } + + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); + + la57_toggle(new_cr3); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 764bac6b58f96..adaddd38d97d1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -17,6 +17,7 @@ #include #include "efistub.h" +#include "x86-stub.h" /* Maximum physical address for 64-bit kernel with 4-level paging */ #define MAXMEM_X86_64_4LEVEL (1ull << 46) @@ -212,8 +213,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -static void -adjust_memory_range_protection(unsigned long start, unsigned long size) +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -267,35 +268,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) } } -/* - * Trampoline takes 2 pages and can be loaded in first megabyte of memory - * with its end placed between 128k and 640k where BIOS might start. - * (see arch/x86/boot/compressed/pgtable_64.c) - * - * We cannot find exact trampoline placement since memory map - * can be modified by UEFI, and it can alter the computed address. - */ - -#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) -#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) - extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) { - /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ - - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, - TRAMPOLINE_PLACEMENT_SIZE); - #ifdef CONFIG_64BIT if (image_base != (unsigned long)startup_32) - adjust_memory_range_protection(image_base, image_size); + efi_adjust_memory_range_protection(image_base, image_size); #else /* * Clear protection flags on a whole range of possible @@ -305,8 +285,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size) * need to remove possible protection on relocated image * itself disregarding further relocations. */ - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); + efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, + KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); #endif } @@ -796,6 +776,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_dxe_table = NULL; } + status = efi_setup_5level_paging(); + if (status != EFI_SUCCESS) { + efi_err("efi_setup_5level_paging() failed!\n"); + goto fail; + } + /* * If the kernel isn't already loaded at a suitable address, * relocate it. @@ -914,6 +900,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } + efi_5level_switch(); + if (IS_ENABLED(CONFIG_X86_64)) bzimage_addr += startup_64 - startup_32; diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h new file mode 100644 index 0000000000000..37c5a36b9d8cf --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +extern void trampoline_32bit_src(void *, bool); +extern const u16 trampoline_ljmp_imm_offset; + +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); + +#ifdef CONFIG_X86_64 +efi_status_t efi_setup_5level_paging(void); +void efi_5level_switch(void); +#else +static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } +static inline void efi_5level_switch(void) {} +#endif -- GitLab From 5a664585a71c3af82a64aa9b38cadfa02f11c841 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:48 +0100 Subject: [PATCH 0591/1418] x86/decompressor: Factor out kernel decompression and relocation From: Ard Biesheuvel [ Commit 83381519352d6b5b3e429bf72aaab907480cb6b6 upstream ] Factor out the decompressor sequence that invokes the decompressor, parses the ELF and applies the relocations so that it can be called directly from the EFI stub. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-21-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.c | 29 ++++++++++++++++++++++++----- arch/x86/include/asm/boot.h | 8 ++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e4e3e49fcc374..fb55ac18af6fa 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,11 +330,33 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_total_size = VO__end - VO__text; + static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); extern unsigned char input_data[]; extern unsigned int input_len, output_len; +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; +} + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -354,7 +376,6 @@ extern unsigned int input_len, output_len; */ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; memptr heap = (memptr)boot_heap; unsigned long needed_size; @@ -457,10 +478,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) #endif debug_putstr("\nDecompressing Linux... "); - __decompress(input_data, input_len, NULL, NULL, output, output_len, - NULL, error); - entry_offset = parse_elf(output); - handle_relocations(output, output_len, virt_addr); + + entry_offset = decompress_kernel(output, virt_addr, error); debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); debug_puthex(entry_offset); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 215d37f7dde8a..b3a7cfb0d99e0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -79,4 +79,12 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); +#endif + #endif /* _ASM_X86_BOOT_H */ -- GitLab From 77330c123d7c443936585f25b31d3979876ba1d0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:49 +0100 Subject: [PATCH 0592/1418] x86/efistub: Prefer EFI memory attributes protocol over DXE services From: Ard Biesheuvel [ Commit 11078876b7a6a1b7226344fecab968945c806832 upstream ] Currently, the EFI stub relies on DXE services in some cases to clear non-execute restrictions from page allocations that need to be executable. This is dodgy, because DXE services are not specified by UEFI but by PI, and they are not intended for consumption by OS loaders. However, no alternative existed at the time. Now, there is a new UEFI protocol that should be used instead, so if it exists, prefer it over the DXE services calls. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-18-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index adaddd38d97d1..01af018b93158 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -26,6 +26,7 @@ const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; +static efi_memory_attribute_protocol_t *memattr; static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) @@ -222,12 +223,18 @@ void efi_adjust_memory_range_protection(unsigned long start, unsigned long rounded_start, rounded_end; unsigned long unprotect_start, unprotect_size; - if (efi_dxe_table == NULL) - return; - rounded_start = rounddown(start, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE); + if (memattr != NULL) { + efi_call_proto(memattr, clear_memory_attributes, rounded_start, + rounded_end - rounded_start, EFI_MEMORY_XP); + return; + } + + if (efi_dxe_table == NULL) + return; + /* * Don't modify memory region attributes, they are * already suitable, to lower the possibility to @@ -758,6 +765,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params) { + efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; @@ -769,13 +777,18 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); - efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); - if (efi_dxe_table && - efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { - efi_warn("Ignoring DXE services table: invalid signature\n"); - efi_dxe_table = NULL; + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { + efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); + if (efi_dxe_table && + efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { + efi_warn("Ignoring DXE services table: invalid signature\n"); + efi_dxe_table = NULL; + } } + /* grab the memory attributes protocol if it exists */ + efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); + status = efi_setup_5level_paging(); if (status != EFI_SUCCESS) { efi_err("efi_setup_5level_paging() failed!\n"); -- GitLab From fff7614f576f802fb0f4ff169cb251c180ce377e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:50 +0100 Subject: [PATCH 0593/1418] x86/efistub: Perform SNP feature test while running in the firmware From: Ard Biesheuvel [ Commit 31c77a50992e8dd136feed7b67073bb5f1f978cc upstream ] Before refactoring the EFI stub boot flow to avoid the legacy bare metal decompressor, duplicate the SNP feature check in the EFI stub before handing over to the kernel proper. The SNP feature check can be performed while running under the EFI boot services, which means it can force the boot to fail gracefully and return an error to the bootloader if the loaded kernel does not implement support for all the features that the hypervisor enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-23-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/sev.c | 112 ++++++++++++++---------- arch/x86/include/asm/sev.h | 5 ++ drivers/firmware/efi/libstub/x86-stub.c | 17 ++++ 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 9c91cc40f4565..8b21c57bc4700 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -327,20 +327,25 @@ static void enforce_vmpl0(void) */ #define SNP_FEATURES_PRESENT (0) +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} + void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -350,35 +355,22 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; - struct msr m; - bool snp; - - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 to ensure that uninitialized values from - * buggy bootloaders aren't propagated. - */ - if (bp) - bp->cc_blob_address = 0; - - /* - * Do an initial SEV capability check before snp_init() which - * loads the CPUID page and the same checks afterwards are done - * without the hypervisor and are trustworthy. - * - * If the HV fakes SEV support, the guest will crash'n'burn - * which is good enough. - */ /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) - return; + return -ENODEV; /* * Check for the SME/SEV feature: @@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp) native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ + struct msr m; + int bitpos; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + if (sev_check_cpu_support() < 0) return; /* @@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp) /* Now repeat the checks with the SNP CPUID table. */ - /* Recheck the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Recheck for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7ca5c9ec8b52e..e231638ba19a4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -202,6 +202,8 @@ void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -225,6 +227,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } + +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 01af018b93158..8d3ce383bcbb9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "efistub.h" #include "x86-stub.h" @@ -747,6 +748,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static bool have_unsupported_snp_features(void) +{ + u64 unsupported; + + unsupported = snp_get_unsupported_features(sev_get_status()); + if (unsupported) { + efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", + unsupported); + return true; + } + return false; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -777,6 +791,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); + if (have_unsupported_snp_features()) + efi_exit(handle, EFI_UNSUPPORTED); + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); if (efi_dxe_table && -- GitLab From 2dfaeac3f38e4e550d215204eedd97a061fdc118 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:51 +0100 Subject: [PATCH 0594/1418] x86/efistub: Avoid legacy decompressor when doing EFI boot From: Ard Biesheuvel [ Commit a1b87d54f4e45ff5e0d081fb1d9db3bf1a8fb39a upstream ] The bare metal decompressor code was never really intended to run in a hosted environment such as the EFI boot services, and does a few things that are becoming problematic in the context of EFI boot now that the logo requirements are getting tighter: EFI executables will no longer be allowed to consist of a single executable section that is mapped with read, write and execute permissions if they are intended for use in a context where Secure Boot is enabled (and where Microsoft's set of certificates is used, i.e., every x86 PC built to run Windows). To avoid stepping on reserved memory before having inspected the E820 tables, and to ensure the correct placement when running a kernel build that is non-relocatable, the bare metal decompressor moves its own executable image to the end of the allocation that was reserved for it, in order to perform the decompression in place. This means the region in question requires both write and execute permissions, which either need to be given upfront (which EFI will no longer permit), or need to be applied on demand using the existing page fault handling framework. However, the physical placement of the kernel is usually randomized anyway, and even if it isn't, a dedicated decompression output buffer can be allocated anywhere in memory using EFI APIs when still running in the boot services, given that EFI support already implies a relocatable kernel. This means that decompression in place is never necessary, nor is moving the compressed image from one end to the other. Since EFI already maps all of memory 1:1, it is also unnecessary to create new page tables or handle page faults when decompressing the kernel. That means there is also no need to replace the special exception handlers for SEV. Generally, there is little need to do any of the things that the decompressor does beyond - initialize SEV encryption, if needed, - perform the 4/5 level paging switch, if needed, - decompress the kernel - relocate the kernel So do all of this from the EFI stub code, and avoid the bare metal decompressor altogether. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-24-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 5 + arch/x86/boot/compressed/efi_mixed.S | 55 -------- arch/x86/boot/compressed/head_32.S | 13 -- arch/x86/boot/compressed/head_64.S | 27 ---- arch/x86/include/asm/efi.h | 7 +- arch/x86/include/asm/sev.h | 2 + drivers/firmware/efi/libstub/x86-stub.c | 166 ++++++++++-------------- 7 files changed, 84 insertions(+), 191 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0c9ebf74fac59..3965b2c9efee0 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index d6d1b76b594d9..8232c5b2a9bf5 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -275,10 +275,6 @@ SYM_FUNC_START_LOCAL(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - /* * efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_system_table_32_t *sys_table) @@ -286,8 +282,6 @@ SYM_FUNC_END(efi32_entry) SYM_FUNC_START(efi32_pe_entry) pushl %ebp movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - pushl %ebx // save callee-save registers pushl %edi @@ -296,48 +290,8 @@ SYM_FUNC_START(efi32_pe_entry) movl $0x80000003, %eax // EFI_UNSUPPORTED jnz 2f - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live @@ -356,15 +310,6 @@ SYM_FUNC_START_NOALIGN(efi64_stub_entry) SYM_FUNC_END(efi64_stub_entry) #endif - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) - .data .balign 8 SYM_DATA_START_LOCAL(efi32_boot_gdt) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3af4a383615b3..1cfe9802a42fe 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 9a0d83b4d266d..0d7aef10b19ad 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -138,19 +138,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -327,20 +314,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 522ff2e443b37..e601264b1a243 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -88,6 +88,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -101,8 +103,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index e231638ba19a4..cf98fc28601fb 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -210,6 +211,7 @@ static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 8d3ce383bcbb9..61017921f9ca9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,17 +15,14 @@ #include #include #include +#include #include #include "efistub.h" #include "x86-stub.h" -/* Maximum physical address for 64-bit kernel with 4-level paging */ -#define MAXMEM_X86_64_4LEVEL (1ull << 46) - const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; static efi_memory_attribute_protocol_t *memattr; @@ -276,33 +273,9 @@ void efi_adjust_memory_range_protection(unsigned long start, } } -extern const u8 startup_32[], startup_64[]; - -static void -setup_memory_protection(unsigned long image_base, unsigned long image_size) -{ -#ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - efi_adjust_memory_range_protection(image_base, image_size); -#else - /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ - efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); -#endif -} - static const efi_char16_t apple[] = L"Apple"; -static void setup_quirks(struct boot_params *boot_params, - unsigned long image_base, - unsigned long image_size) +static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) efi_table_attr(efi_system_table, fw_vendor); @@ -311,9 +284,6 @@ static void setup_quirks(struct boot_params *boot_params, if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) retrieve_apple_device_properties(boot_params); } - - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) - setup_memory_protection(image_base, image_size); } /* @@ -466,7 +436,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } image_base = efi_table_attr(image, image_base); - image_offset = (void *)startup_32 - image_base; status = efi_allocate_pages(sizeof(struct boot_params), (unsigned long *)&boot_params, ULONG_MAX); @@ -761,6 +730,61 @@ static bool have_unsupported_snp_features(void) return false; } +static void efi_get_seed(void *seed, int size) +{ + efi_get_random_bytes(size, seed); + + /* + * This only updates seed[0] when running on 32-bit, but in that case, + * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. + */ + *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); +} + +static void error(char *str) +{ + efi_warn("Decompression failed: %s\n", str); +} + +static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) +{ + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long addr, alloc_size, entry; + efi_status_t status; + u32 seed[2] = {}; + + /* determine the required size of the allocation */ + alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), + MIN_KERNEL_ALIGN); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + + efi_get_seed(seed, sizeof(seed)); + + virt_addr += (range * seed[1]) >> 32; + virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + } + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; + + entry = decompress_kernel((void *)addr, virt_addr, error); + if (entry == ULONG_MAX) { + efi_free(alloc_size, addr); + return EFI_LOAD_ERROR; + } + + *kernel_entry = addr + entry; + + efi_adjust_memory_range_protection(addr, kernel_total_size); + + return EFI_SUCCESS; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -780,10 +804,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, struct boot_params *boot_params) { efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; - unsigned long bzimage_addr = (unsigned long)startup_32; - unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; const struct linux_efi_initrd *initrd = NULL; + unsigned long kernel_entry; efi_status_t status; efi_system_table = sys_table_arg; @@ -812,60 +835,6 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - /* - * If the kernel isn't already loaded at a suitable address, - * relocate it. - * - * It must be loaded above LOAD_PHYSICAL_ADDR. - * - * The maximum address for 64-bit is 1 << 46 for 4-level paging. This - * is defined as the macro MAXMEM, but unfortunately that is not a - * compile-time constant if 5-level paging is configured, so we instead - * define our own macro for use here. - * - * For 32-bit, the maximum address is complicated to figure out, for - * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what - * KASLR uses. - * - * Also relocate it if image_offset is zero, i.e. the kernel wasn't - * loaded by LoadImage, but rather by a bootloader that called the - * handover entry. The reason we must always relocate in this case is - * to handle the case of systemd-boot booting a unified kernel image, - * which is a PE executable that contains the bzImage and an initrd as - * COFF sections. The initrd section is placed after the bzImage - * without ensuring that there are at least init_size bytes available - * for the bzImage, and thus the compressed kernel's startup code may - * overwrite the initrd unless it is moved out of the way. - */ - - buffer_start = ALIGN(bzimage_addr - image_offset, - hdr->kernel_alignment); - buffer_end = buffer_start + hdr->init_size; - - if ((buffer_start < LOAD_PHYSICAL_ADDR) || - (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || - (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0)) { - extern char _bss[]; - - status = efi_relocate_kernel(&bzimage_addr, - (unsigned long)_bss - bzimage_addr, - hdr->init_size, - hdr->pref_address, - hdr->kernel_alignment, - LOAD_PHYSICAL_ADDR); - if (status != EFI_SUCCESS) { - efi_err("efi_relocate_kernel() failed!\n"); - goto fail; - } - /* - * Now that we've copied the kernel elsewhere, we no longer - * have a set up block before startup_32(), so reset image_offset - * to zero in case it was set earlier. - */ - image_offset = 0; - } - #ifdef CONFIG_CMDLINE_BOOL status = efi_parse_options(CONFIG_CMDLINE); if (status != EFI_SUCCESS) { @@ -883,6 +852,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, } } + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); + goto fail; + } + /* * At this point, an initrd may already have been loaded by the * bootloader and passed via bootparams. We permit an initrd loaded @@ -922,7 +897,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params); status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { @@ -930,12 +905,15 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - efi_5level_switch(); + /* + * Call the SEV init code while still running with the firmware's + * GDT/IDT, so #VC exceptions will be handled by EFI. + */ + sev_enable(boot_params); - if (IS_ENABLED(CONFIG_X86_64)) - bzimage_addr += startup_64 - startup_32; + efi_5level_switch(); - enter_kernel(bzimage_addr, boot_params); + enter_kernel(kernel_entry, boot_params); fail: efi_err("efi_stub_entry() failed!\n"); -- GitLab From 1b54062576792b41f0acb8d562deea7c4c718c33 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:52 +0100 Subject: [PATCH 0595/1418] efi/x86: Avoid physical KASLR on older Dell systems From: Ard Biesheuvel [ Commit 50d7cdf7a9b1ab6f4f74a69c84e974d5dc0c1bf1 upstream ] River reports boot hangs with v6.6 and v6.7, and the bisect points to commit a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") which moves the memory allocation and kernel decompression from the legacy decompressor (which executes *after* ExitBootServices()) to the EFI stub, using boot services for allocating the memory. The memory allocation succeeds but the subsequent call to decompress_kernel() never returns, resulting in a failed boot and a hanging system. As it turns out, this issue only occurs when physical address randomization (KASLR) is enabled, and given that this is a feature we can live without (virtual KASLR is much more important), let's disable the physical part of KASLR when booting on AMI UEFI firmware claiming to implement revision v2.0 of the specification (which was released in 2006), as this is the version these systems advertise. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218173 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 31 +++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 61017921f9ca9..47ebc85c0d22e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -273,17 +273,20 @@ void efi_adjust_memory_range_protection(unsigned long start, } } +static efi_char16_t *efistub_fw_vendor(void) +{ + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); + + return (efi_char16_t *)vendor; +} + static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -759,11 +762,25 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; efi_get_seed(seed, sizeof(seed)); virt_addr += (range * seed[1]) >> 32; virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, -- GitLab From 86c909d2275b91fb34be07b081c7343a0c2351f2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:53 +0100 Subject: [PATCH 0596/1418] x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR From: Ard Biesheuvel [ Commit 2f77465b05b1270c832b5e2ee27037672ad2a10a upstream ] The EFI stub's kernel placement logic randomizes the physical placement of the kernel by taking all available memory into account, and picking a region at random, based on a random seed. When KASLR is disabled, this seed is set to 0x0, and this results in the lowest available region of memory to be selected for loading the kernel, even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is typically reserved for the GFP_DMA region, to accommodate masters that can only access the first 16 MiB of system memory. Even if such devices are rare these days, we may still end up with a warning in the kernel log, as reported by Tom: swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0 Fix this by tweaking the random allocation logic to accept a low bound on the placement, and set it to LOAD_PHYSICAL_ADDR. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Reported-by: Tom Englund Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 3 ++- drivers/firmware/efi/libstub/randomalloc.c | 12 +++++++----- drivers/firmware/efi/libstub/x86-stub.c | 1 + 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 16377b4521190..16f15e36f9a7d 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -181,7 +181,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE, EFI_ALLOC_LIMIT); + EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 4b4055877f3d3..6741f3d900c5a 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -906,7 +906,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type, unsigned long alloc_limit); + int memory_type, unsigned long alloc_min, + unsigned long alloc_max); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index ed6f6087a9eac..7ba05719a53ba 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -17,7 +17,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, unsigned long align_shift, - u64 alloc_limit) + u64 alloc_min, u64 alloc_max) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - alloc_limit); + alloc_max); if (region_end < size) return 0; - first_slot = round_up(md->phys_addr, align); + first_slot = round_up(max(md->phys_addr, alloc_min), align); last_slot = round_down(region_end - size + 1, align); if (first_slot > last_slot) @@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long *addr, unsigned long random_seed, int memory_type, - unsigned long alloc_limit) + unsigned long alloc_min, + unsigned long alloc_max) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, + alloc_max); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 47ebc85c0d22e..c1dcc86fcc3d2 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -785,6 +785,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, seed[0], EFI_LOADER_CODE, + LOAD_PHYSICAL_ADDR, EFI_X86_KERNEL_ALLOC_LIMIT); if (status != EFI_SUCCESS) return status; -- GitLab From 8f05493706ff8296d26b449db295b1dbb1de31dd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:54 +0100 Subject: [PATCH 0597/1418] x86/boot: Rename conflicting 'boot_params' pointer to 'boot_params_ptr' From: Ard Biesheuvel [ Commit b9e909f78e7e4b826f318cfe7bedf3ce229920e6 upstream ] The x86 decompressor is built and linked as a separate executable, but it shares components with the kernel proper, which are either #include'd as C files, or linked into the decompresor as a static library (e.g, the EFI stub) Both the kernel itself and the decompressor define a global symbol 'boot_params' to refer to the boot_params struct, but in the former case, it refers to the struct directly, whereas in the decompressor, it refers to a global pointer variable referring to the struct boot_params passed by the bootloader or constructed from scratch. This ambiguity is unfortunate, and makes it impossible to assign this decompressor variable from the x86 EFI stub, given that declaring it as extern results in a clash. So rename the decompressor version (whose scope is limited) to boot_params_ptr. [ mingo: Renamed 'boot_params_p' to 'boot_params_ptr' for clarity ] Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/acpi.c | 14 ++++++------- arch/x86/boot/compressed/cmdline.c | 4 ++-- arch/x86/boot/compressed/ident_map_64.c | 7 ++++--- arch/x86/boot/compressed/kaslr.c | 26 ++++++++++++------------- arch/x86/boot/compressed/misc.c | 24 +++++++++++------------ arch/x86/boot/compressed/misc.h | 1 - arch/x86/boot/compressed/pgtable_64.c | 9 ++++----- arch/x86/boot/compressed/sev.c | 2 +- arch/x86/include/asm/boot.h | 2 ++ 9 files changed, 45 insertions(+), 44 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 9caf89063e775..55c98fdd67d2b 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to * ACPI_TABLE_GUID because it has more features. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_20_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; @@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void) enum efi_type et; int ret; - et = efi_get_type(boot_params); + et = efi_get_type(boot_params_ptr); if (et == EFI_TYPE_NONE) return 0; - systab_pa = efi_get_system_table(boot_params); + systab_pa = efi_get_system_table(boot_params_ptr); if (!systab_pa) error("EFI support advertised, but unable to locate system table."); - ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len); if (ret || !cfg_tbl_pa) error("EFI config table not found."); @@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void) { acpi_physical_address pa; - pa = boot_params->acpi_rsdp_addr; + pa = boot_params_ptr->acpi_rsdp_addr; if (!pa) pa = efi_get_rsdp_addr(); @@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void) rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); if (!rsdp) rsdp = (struct acpi_table_rsdp *)(long) - boot_params->acpi_rsdp_addr; + boot_params_ptr->acpi_rsdp_addr; if (!rsdp) return 0; diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index f1add5d85da9d..c1bb180973ea2 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d34222816c9f5..b8c42339bc355 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode) * or does not touch all the pages covering them. */ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); - boot_params = rmode; - kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + boot_params_ptr = rmode; + kernel_add_identity_map((unsigned long)boot_params_ptr, + (unsigned long)(boot_params_ptr + 1)); cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); @@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode) * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping. */ - sd = (struct setup_data *)boot_params->hdr.setup_data; + sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsigned long sd_addr = (unsigned long)sd; diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e476bcbd9b422..9794d9174795d 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void) unsigned long hash = 0; hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); return hash; } @@ -383,7 +383,7 @@ static void handle_mem_options(void) static void mem_avoid_init(unsigned long input, unsigned long input_size, unsigned long output) { - unsigned long init_size = boot_params->hdr.init_size; + unsigned long init_size = boot_params_ptr->hdr.init_size; u64 initrd_start, initrd_size; unsigned long cmd_line, cmd_line_size; @@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; /* Avoid initrd. */ - initrd_start = (u64)boot_params->ext_ramdisk_image << 32; - initrd_start |= boot_params->hdr.ramdisk_image; - initrd_size = (u64)boot_params->ext_ramdisk_size << 32; - initrd_size |= boot_params->hdr.ramdisk_size; + initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; + initrd_start |= boot_params_ptr->hdr.ramdisk_image; + initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; + initrd_size |= boot_params_ptr->hdr.ramdisk_size; mem_avoid[MEM_AVOID_INITRD].start = initrd_start; mem_avoid[MEM_AVOID_INITRD].size = initrd_size; /* No need to set mapping for initrd, it will be handled in VO. */ @@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, } /* Avoid boot parameters. */ - mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; - mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); /* We don't need to set a mapping for setup_data. */ @@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, } /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; while (ptr) { struct mem_vector avoid; @@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region, static bool process_efi_entries(unsigned long minimum, unsigned long image_size) { - struct efi_info *e = &boot_params->efi_info; + struct efi_info *e = &boot_params_ptr->efi_info; bool efi_mirror_found = false; struct mem_vector region; efi_memory_desc_t *md; @@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum, struct boot_e820_entry *entry; /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < boot_params->e820_entries; i++) { - entry = &boot_params->e820_table[i]; + for (i = 0; i < boot_params_ptr->e820_entries; i++) { + entry = &boot_params_ptr->e820_table[i]; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) continue; @@ -836,7 +836,7 @@ void choose_random_location(unsigned long input, return; } - boot_params->hdr.loadflags |= KASLR_FLAG; + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; if (IS_ENABLED(CONFIG_X86_32)) mem_limit = KERNEL_IMAGE_SIZE; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index fb55ac18af6fa..8ae7893d712ff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *boot_params; +struct boot_params *boot_params_ptr; struct port_io_ops pio_ops; @@ -132,8 +132,8 @@ void __putstr(const char *s) if (lines == 0 || cols == 0) return; - x = boot_params->screen_info.orig_x; - y = boot_params->screen_info.orig_y; + x = boot_params_ptr->screen_info.orig_x; + y = boot_params_ptr->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -154,8 +154,8 @@ void __putstr(const char *s) } } - boot_params->screen_info.orig_x = x; - boot_params->screen_info.orig_y = y; + boot_params_ptr->screen_info.orig_x = x; + boot_params_ptr->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -382,14 +382,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ - boot_params = rmode; + boot_params_ptr = rmode; /* Clear flags intended for solely in-kernel use. */ - boot_params->hdr.loadflags &= ~KASLR_FLAG; + boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(boot_params); + sanitize_boot_params(boot_params_ptr); - if (boot_params->screen_info.orig_video_mode == 7) { + if (boot_params_ptr->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -397,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) vidport = 0x3d4; } - lines = boot_params->screen_info.orig_video_lines; - cols = boot_params->screen_info.orig_video_cols; + lines = boot_params_ptr->screen_info.orig_video_lines; + cols = boot_params_ptr->screen_info.orig_video_cols; init_default_io_ops(); @@ -417,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) * so that early debugging output from the RSDP parsing code can be * collected. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); debug_putstr("early console in extract_kernel\n"); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b6e46435b90b8..254acd76efde2 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,7 +52,6 @@ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; void *malloc(int size); void free(void *where); -extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 7939eb6e6ce9b..51f957b24ba7a 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -28,7 +28,6 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; */ unsigned long *trampoline_32bit __section(".data"); -extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) @@ -49,7 +48,7 @@ static unsigned long find_trampoline_placement(void) * * Only look for values in the legacy ROM for non-EFI system. */ - signature = (char *)&boot_params->efi_info.efi_loader_signature; + signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature; if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { ebda_start = *(unsigned short *)0x40e << 4; @@ -65,10 +64,10 @@ static unsigned long find_trampoline_placement(void) bios_start = round_down(bios_start, PAGE_SIZE); /* Find the first usable memory region under bios_start. */ - for (i = boot_params->e820_entries - 1; i >= 0; i--) { + for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) { unsigned long new = bios_start; - entry = &boot_params->e820_table[i]; + entry = &boot_params_ptr->e820_table[i]; /* Skip all entries above bios_start. */ if (bios_start <= entry->addr) @@ -107,7 +106,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = bp; + boot_params_ptr = bp; /* * Check if LA57 is desired and supported. diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 8b21c57bc4700..d07e665bb265b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -565,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) * accessed after switchover. */ if (sev_snp_enabled()) { - unsigned long cc_info_pa = boot_params->cc_blob_address; + unsigned long cc_info_pa = boot_params_ptr->cc_blob_address; struct cc_blob_sev_info *cc_info; kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index b3a7cfb0d99e0..a38cc0afc90a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -85,6 +85,8 @@ extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, void (*error)(char *x)); + +extern struct boot_params *boot_params_ptr; #endif #endif /* _ASM_X86_BOOT_H */ -- GitLab From 3a396c409a39ce701533f3f55f3db0ab700aaeae Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:55 +0100 Subject: [PATCH 0598/1418] x86/boot: efistub: Assign global boot_params variable From: Ard Biesheuvel [ Commit 50dcc2e0d62e3c4a54f39673c4dc3dcde7c74d52 upstream ] Now that the x86 EFI stub calls into some APIs exposed by the decompressor (e.g., kaslr_get_random_long()), it is necessary to ensure that the global boot_params variable is set correctly before doing so. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index c1dcc86fcc3d2..b183b40195eee 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -827,6 +827,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, unsigned long kernel_entry; efi_status_t status; + boot_params_ptr = boot_params; + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) -- GitLab From 2402392bed4e440e05442fb1de4ef97536ff5a96 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:56 +0100 Subject: [PATCH 0599/1418] efi/x86: Fix the missing KASLR_FLAG bit in boot_params->hdr.loadflags From: Yuntao Wang [ Commit 01638431c465741e071ab34acf3bef3c2570f878 upstream ] When KASLR is enabled, the KASLR_FLAG bit in boot_params->hdr.loadflags should be set to 1 to propagate KASLR status from compressed kernel to kernel, just as the choose_random_location() function does. Currently, when the kernel is booted via the EFI stub, the KASLR_FLAG bit in boot_params->hdr.loadflags is not set, even though it should be. This causes some functions, such as kernel_randomize_memory(), not to execute as expected. Fix it. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Signed-off-by: Yuntao Wang [ardb: drop 'else' branch clearing KASLR_FLAG] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index b183b40195eee..a0757a37b482b 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -781,6 +781,8 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); seed[0] = 0; } + + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, -- GitLab From c4c795b21dd23d9514ae1c6646c3fb2c78b5be60 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 Feb 2024 09:46:57 -0800 Subject: [PATCH 0600/1418] af_unix: Drop oob_skb ref before purging queue in GC. commit aa82ac51d63328714645c827775d64dbfd9941f3 upstream. syzbot reported another task hung in __unix_gc(). [0] The current while loop assumes that all of the left candidates have oob_skb and calling kfree_skb(oob_skb) releases the remaining candidates. However, I missed a case that oob_skb has self-referencing fd and another fd and the latter sk is placed before the former in the candidate list. Then, the while loop never proceeds, resulting the task hung. __unix_gc() has the same loop just before purging the collected skb, so we can call kfree_skb(oob_skb) there and let __skb_queue_purge() release all inflight sockets. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 2784 Comm: kworker/u4:8 Not tainted 6.8.0-rc4-syzkaller-01028-g71b605d32017 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x70 kernel/kcov.c:200 Code: 89 fb e8 23 00 00 00 48 8b 3d 84 f5 1a 0c 48 89 de 5b e9 43 26 57 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1e fa 48 8b 04 24 65 48 8b 0d 90 52 70 7e 65 8b 15 91 52 70 RSP: 0018:ffffc9000a17fa78 EFLAGS: 00000287 RAX: ffffffff8a0a6108 RBX: ffff88802b6c2640 RCX: ffff88802c0b3b80 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffffc9000a17fbf0 R08: ffffffff89383f1d R09: 1ffff1100ee5ff84 R10: dffffc0000000000 R11: ffffed100ee5ff85 R12: 1ffff110056d84ee R13: ffffc9000a17fae0 R14: 0000000000000000 R15: ffffffff8f47b840 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffef5687ff8 CR3: 0000000029b34000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __unix_gc+0xe69/0xf40 net/unix/garbage.c:343 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Reported-and-tested-by: syzbot+ecab4d36f920c3574bf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ecab4d36f920c3574bf9 Fixes: 25236c91b5ab ("af_unix: Fix task hung while purging oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/garbage.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9e1bab97c05ba..ab2c83d58b62a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -314,18 +322,6 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - while (!list_empty(&gc_candidates)) { - u = list_entry(gc_candidates.next, struct unix_sock, link); - if (u->oob_skb) { - struct sk_buff *skb = u->oob_skb; - - u->oob_skb = NULL; - kfree_skb(skb); - } - } -#endif - spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to -- GitLab From 2c96f66cd0cca5695ec326398f98b58f545ac087 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:33:43 +0100 Subject: [PATCH 0601/1418] phy: freescale: phy-fsl-imx8-mipi-dphy: Fix alias name to use dashes [ Upstream commit 7936378cb6d87073163130e1e1fc1e5f76a597cf ] Devicetree spec lists only dashes as valid characters for alias names. Table 3.2: Valid characters for alias names, Devicee Specification, Release v0.4 Signed-off-by: Alexander Stein Fixes: 3fbae284887de ("phy: freescale: phy-fsl-imx8-mipi-dphy: Add i.MX8qxp LVDS PHY mode support") Link: https://lore.kernel.org/r/20240110093343.468810-1-alexander.stein@ew.tq-group.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bfc..0928a526e2ab3 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); -- GitLab From 7eb95e0af5c9c2e6fad50356eaf32d216d0e7bc3 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 25 Jan 2024 14:30:17 -0600 Subject: [PATCH 0602/1418] powerpc/pseries/iommu: IOMMU table is not initialized for kdump over SR-IOV [ Upstream commit 09a3c1e46142199adcee372a420b024b4fc61051 ] When kdump kernel tries to copy dump data over SR-IOV, LPAR panics due to NULL pointer exception: Kernel attempted to read user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc000000020847ad4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: mlx5_core(+) vmx_crypto pseries_wdt papr_scm libnvdimm mlxfw tls psample sunrpc fuse overlay squashfs loop CPU: 12 PID: 315 Comm: systemd-udevd Not tainted 6.4.0-Test102+ #12 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c000000020847ad4 LR: c00000002083b2dc CTR: 00000000006cd18c REGS: c000000029162ca0 TRAP: 0300 Not tainted (6.4.0-Test102+) MSR: 800000000280b033 CR: 48288244 XER: 00000008 CFAR: c00000002083b2d8 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1 ... NIP _find_next_zero_bit+0x24/0x110 LR bitmap_find_next_zero_area_off+0x5c/0xe0 Call Trace: dev_printk_emit+0x38/0x48 (unreliable) iommu_area_alloc+0xc4/0x180 iommu_range_alloc+0x1e8/0x580 iommu_alloc+0x60/0x130 iommu_alloc_coherent+0x158/0x2b0 dma_iommu_alloc_coherent+0x3c/0x50 dma_alloc_attrs+0x170/0x1f0 mlx5_cmd_init+0xc0/0x760 [mlx5_core] mlx5_function_setup+0xf0/0x510 [mlx5_core] mlx5_init_one+0x84/0x210 [mlx5_core] probe_one+0x118/0x2c0 [mlx5_core] local_pci_probe+0x68/0x110 pci_call_probe+0x68/0x200 pci_device_probe+0xbc/0x1a0 really_probe+0x104/0x540 __driver_probe_device+0xb4/0x230 driver_probe_device+0x54/0x130 __driver_attach+0x158/0x2b0 bus_for_each_dev+0xa8/0x130 driver_attach+0x34/0x50 bus_add_driver+0x16c/0x300 driver_register+0xa4/0x1b0 __pci_register_driver+0x68/0x80 mlx5_init+0xb8/0x100 [mlx5_core] do_one_initcall+0x60/0x300 do_init_module+0x7c/0x2b0 At the time of LPAR dump, before kexec hands over control to kdump kernel, DDWs (Dynamic DMA Windows) are scanned and added to the FDT. For the SR-IOV case, default DMA window "ibm,dma-window" is removed from the FDT and DDW added, for the device. Now, kexec hands over control to the kdump kernel. When the kdump kernel initializes, PCI busses are scanned and IOMMU group/tables created, in pci_dma_bus_setup_pSeriesLP(). For the SR-IOV case, there is no "ibm,dma-window". The original commit: b1fc44eaa9ba, fixes the path where memory is pre-mapped (direct mapped) to the DDW. When TCEs are direct mapped, there is no need to initialize IOMMU tables. iommu_table_setparms_lpar() only considers "ibm,dma-window" property when initiallizing IOMMU table. In the scenario where TCEs are dynamically allocated for SR-IOV, newly created IOMMU table is not initialized. Later, when the device driver tries to enter TCEs for the SR-IOV device, NULL pointer execption is thrown from iommu_area_alloc(). The fix is to initialize the IOMMU table with DDW property stored in the FDT. There are 2 points to remember: 1. For the dedicated adapter, kdump kernel would encounter both default and DDW in FDT. In this case, DDW property is used to initialize the IOMMU table. 2. A DDW could be direct or dynamic mapped. kdump kernel would initialize IOMMU table and mark the existing DDW as "dynamic". This works fine since, at the time of table initialization, iommu_table_clear() makes some space in the DDW, for some predefined number of TCEs which are needed for kdump to succeed. Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Gaurav Batra Reviewed-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240125203017.61014-1-gbatra@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/iommu.c | 156 +++++++++++++++++-------- 1 file changed, 105 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 97b026130c71b..1e5f083cdb720 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; + } + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, -- GitLab From a3d369aeb332bc7a29ba1facb9a3d3d8ba8d2568 Mon Sep 17 00:00:00 2001 From: Arturas Moskvinas Date: Fri, 1 Mar 2024 09:12:04 +0200 Subject: [PATCH 0603/1418] gpio: 74x164: Enable output pins after registers are reset [ Upstream commit 530b1dbd97846b110ea8a94c7cc903eca21786e5 ] Chip outputs are enabled[1] before actual reset is performed[2] which might cause pin output value to flip flop if previous pin value was set to 1. Fix that behavior by making sure chip is fully reset before all outputs are enabled. Flip-flop can be noticed when module is removed and inserted again and one of the pins was changed to 1 before removal. 100 microsecond flipping is noticeable on oscilloscope (100khz SPI bus). For a properly reset chip - output is enabled around 100 microseconds (on 100khz SPI bus) later during probing process hence should be irrelevant behavioral change. Fixes: 7ebc194d0fd4 (gpio: 74x164: Introduce 'enable-gpios' property) Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L130 [1] Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L150 [2] Signed-off-by: Arturas Moskvinas Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-74x164.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c333105170..753e7be039e4d 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; -- GitLab From 17acece41de3dafb63018fecbf54d288366901eb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Feb 2024 21:28:46 +0200 Subject: [PATCH 0604/1418] gpiolib: Fix the error path order in gpiochip_add_data_with_key() [ Upstream commit e4aec4daa8c009057b5e063db1b7322252c92dc8 ] After shuffling the code, error path wasn't updated correctly. Fix it here. Fixes: 2f4133bb5f14 ("gpiolib: No need to call gpiochip_remove_pin_ranges() twice") Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6d3e3454a6ed6..f646df7f1b41a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -815,11 +815,11 @@ err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); err_remove_acpi_chip: acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ -- GitLab From c6ff5fb6b157cf4101889c1f3e169eb6897e8f50 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Feb 2024 18:25:49 +0100 Subject: [PATCH 0605/1418] gpio: fix resource unwinding order in error path [ Upstream commit ec5c54a9d3c4f9c15e647b049fea401ee5258696 ] Hogs are added *after* ACPI so should be removed *before* in error path. Fixes: a411e81e61df ("gpiolib: add hogs support for machine code") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f646df7f1b41a..9d8c783124033 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -813,11 +813,11 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: gpiochip_free_valid_mask(gc); -- GitLab From 0e351d1aa2e4c1a7a4cb2a5753b86db89796d3c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 25 Feb 2024 11:01:41 +0800 Subject: [PATCH 0606/1418] block: define bvec_iter as __packed __aligned(4) [ Upstream commit 7838b4656110d950afdd92a081cc0f33e23e0ea8 ] In commit 19416123ab3e ("block: define 'struct bvec_iter' as packed"), what we need is to save the 4byte padding, and avoid `bio` to spread on one extra cache line. It is enough to define it as '__packed __aligned(4)', as '__packed' alone means byte aligned, and can cause compiler to generate horrible code on architectures that don't support unaligned access in case that bvec_iter is embedded in other structures. Cc: Mikulas Patocka Suggested-by: Linus Torvalds Fixes: 19416123ab3e ("block: define 'struct bvec_iter' as packed") Signed-off-by: Ming Lei Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/bvec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 9e3dac51eb26b..d4dbaae8b5218 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -59,7 +59,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; -- GitLab From 19ec82b3cad1abef2a929262b8c1528f4e0c192d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 14:10:12 +0100 Subject: [PATCH 0607/1418] Revert "interconnect: Fix locking for runpm vs reclaim" This reverts commit ee42bfc791aa3cd78e29046f26a09d189beb3efb which is commit af42269c3523492d71ebbe11fefae2653e9cdc78 upstream. It is reported to cause boot crashes in Android systems, so revert it from the stable trees for now. Cc: Rob Clark Cc: Georgi Djakov Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1d9494f64a215..87f380e0e982e 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers); static int providers_count; static bool synced_state; static DEFINE_MUTEX(icc_lock); -static DEFINE_MUTEX(icc_bw_lock); static struct dentry *icc_debugfs_dir; static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) @@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) if (WARN_ON(IS_ERR(path) || !path->num_nodes)) return -EINVAL; - mutex_lock(&icc_bw_lock); + mutex_lock(&icc_lock); old_avg = path->reqs[0].avg_bw; old_peak = path->reqs[0].peak_bw; @@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) apply_constraints(path); } - mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); trace_icc_set_bw_end(path, ret); @@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); node->provider = provider; list_add_tail(&node->node_list, &provider->nodes); @@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = 0; node->peak_bw = 0; - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_node_add); @@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); synced_state = true; list_for_each_entry(p, &icc_providers, provider_list) { dev_dbg(p->dev, "interconnect provider is in synced state\n"); -- GitLab From 559035e04e442a0c7fd58d5fe00308b0d99e2318 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 14:12:15 +0100 Subject: [PATCH 0608/1418] Revert "interconnect: Teach lockdep about icc_bw_lock order" This reverts commit 0db211ec0f1d32b93486e8f6565249ad4d1bece5 which is commit 13619170303878e1dae86d9a58b039475c957fcf upstream. It is reported to cause boot crashes in Android systems, so revert it from the stable trees for now. Cc: Rob Clark Cc: Georgi Djakov Cc: Guenter Roeck Cc: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 87f380e0e982e..4526ff2e1bd5f 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1146,21 +1146,13 @@ void icc_sync_state(struct device *dev) } } } - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_sync_state); static int __init icc_init(void) { - struct device_node *root; - - /* Teach lockdep about lock ordering wrt. shrinker: */ - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&icc_bw_lock); - fs_reclaim_release(GFP_KERNEL); - - root = of_find_node_by_path("/"); + struct device_node *root = of_find_node_by_path("/"); providers_count = of_count_icc_providers(root); of_node_put(root); -- GitLab From 29d3e02fb448b50ffd5d83156de9680daf16f47a Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:23:53 -0800 Subject: [PATCH 0609/1418] x86/bugs: Add asm helpers for executing VERW commit baf8361e54550a48a7087b603313ad013cc13386 upstream. MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as: 1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW). To fix this VERW needs to be moved very late in exit-to-user path. In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF. [pawan: - Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS due to lack of relative addressing support for relocations in kernels < v6.5. - Add UNWIND_HINT_EMPTY to avoid warning: arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction] Reported-by: Alyssa Milburn Suggested-by: Andrew Cooper Suggested-by: Peter Zijlstra Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bcf..09e99d13fc0b3 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b122708792c4d..b60f24b30cb90 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,7 +304,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ - +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d3706de91a934..2c66b2081f877 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,19 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -375,6 +388,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** -- GitLab From 22444d079b4ccc608b9bac3e591cd88629c73df7 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:23:59 -0800 Subject: [PATCH 0610/1418] x86/entry_64: Add VERW just before userspace transition commit 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a upstream. Mitigation for MDS is to use VERW instruction to clear any secrets in CPU Buffers. Any memory accesses after VERW execution can still remain in CPU buffers. It is safer to execute VERW late in return to user path to minimize the window in which kernel data can end up in CPU buffers. There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. Add support for deploying VERW mitigation after user register state is restored. This helps minimize the chances of kernel data ending up into CPU buffers after executing VERW. Note that the mitigation at the new location is not yet enabled. Corner case not handled ======================= Interrupts returning to kernel don't clear CPUs buffers since the exit-to-user path is expected to do that anyways. But, there could be a case when an NMI is generated in kernel after the exit-to-user path has cleared the buffers. This case is not handled and NMI returning to kernel don't clear CPU buffers because: 1. It is rare to get an NMI after VERW, but before returning to user. 2. For an unprivileged user, there is no known way to make that NMI less rare or target it. 3. It would take a large number of these precisely-timed NMIs to mount an actual attack. There's presumably not enough bandwidth. 4. The NMI in question occurs after a VERW, i.e. when user state is restored and most interesting data is already scrubbed. Whats left is only the data that NMI touches, and that may or may not be of any interest. [ pawan: resolved conflict for hunk swapgs_restore_regs_and_return_to_usermode in backport ] Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/entry/entry_64_compat.S | 1 + 2 files changed, 12 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d1244..c2383c2880ec6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -223,6 +223,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi swapgs + CLEAR_CPU_BUFFERS jmp .Lnative_iret @@ -767,6 +769,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1493,6 +1497,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d6c08d8986b17..4bcd009a232bf 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR -- GitLab From 2e3087505ddb8ba2d3d4c81306cca11e868fcdb9 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:05 -0800 Subject: [PATCH 0611/1418] x86/entry_32: Add VERW just before userspace transition commit a0e2dab44d22b913b4c228c8b52b2a104434b0b3 upstream. As done for entry_64, add support for executing VERW late in exit to user path for 32-bit mode. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e71560389..ee5def1060c86 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -981,6 +982,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: -- GitLab From 07946d956b55703102d5eb1518888f0d0ac87e14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:11 -0800 Subject: [PATCH 0612/1418] x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key commit 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 upstream. The VERW mitigation at exit-to-user is enabled via a static branch mds_user_clear. This static branch is never toggled after boot, and can be safely replaced with an ALTERNATIVE() which is convenient to use in asm. Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user path. Also remove the now redundant VERW in exc_nmi() and arch_exit_to_user_mode(). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 38 ++++++++++++++++++++-------- arch/x86/include/asm/entry-common.h | 1 - arch/x86/include/asm/nospec-branch.h | 12 --------- arch/x86/kernel/cpu/bugs.c | 15 +++++------ arch/x86/kernel/nmi.c | 3 --- arch/x86/kvm/vmx/vmx.c | 2 +- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be200f9..e801df0bb3a81 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 11203a9fe0a87..ffe72790ceafd 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2c66b2081f877..8f6f17a8617b6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -381,7 +381,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); @@ -415,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43ded02..d1895930e6eb8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index cec0bfa3bc04f..ed6cce6c39504 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -522,9 +522,6 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 57c1374fdfd49..3b76f1bf001e5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7123,7 +7123,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) + else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) -- GitLab From edfaad334a11d4fba21cbd860ba9a61213f4bd0b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:18 -0800 Subject: [PATCH 0613/1418] KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH From: Sean Christopherson commit 706a189dcf74d3b3f955e9384785e726ed6c7c80 upstream. Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF, for MDS mitigations as late as possible without needing to duplicate VERW for both paths. [ pawan: resolved merge conflict in __vmx_vcpu_run in backport. ] Signed-off-by: Sean Christopherson Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/run_flags.h | 7 +++++-- arch/x86/kvm/vmx/vmenter.S | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc1896..6a9bfdfbb6e59 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4d09e..42c0b2c3aee10 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb $VMX_RUN_VMRESUME, %bl + bt $VMX_RUN_VMRESUME_SHIFT, %bx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -128,8 +128,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'testb' above */ - jz .Lvmlaunch + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" -- GitLab From da67116b74e6aa9c531de386e1d99f2e460d1cc4 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:24 -0800 Subject: [PATCH 0614/1418] KVM/VMX: Move VERW closer to VMentry for MDS mitigation commit 43fb862de8f628c5db5e96831c915b9aebf62d33 upstream. During VMentry VERW is executed to mitigate MDS. After VERW, any memory access like register push onto stack may put host data in MDS affected CPU buffers. A guest can then use MDS to sample host data. Although likelihood of secrets surviving in registers at current VERW callsite is less, but it can't be ruled out. Harden the MDS mitigation by moving the VERW mitigation late in VMentry path. Note that VERW for MMIO Stale Data mitigation is unchanged because of the complexity of per-guest conditional VERW which is not easy to handle that late in asm with no GPRs available. If the CPU is also affected by MDS, VERW is unconditionally executed late in asm regardless of guest having MMIO access. [ pawan: conflict resolved in backport ] Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Sean Christopherson Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmenter.S | 3 +++ arch/x86/kvm/vmx/vmx.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 42c0b2c3aee10..0b2cad66dee12 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -128,6 +128,9 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ jnc .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3b76f1bf001e5..5c1590855ffcd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, { guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); -- GitLab From 5fafd8254add75d8337df44ba8536e407ffe8928 Mon Sep 17 00:00:00 2001 From: Louis DeLosSantos Date: Wed, 31 May 2023 15:38:48 -0400 Subject: [PATCH 0615/1418] bpf: Add table ID to bpf_fib_lookup BPF helper commit 8ad77e72caae22a1ddcfd0c03f2884929e93b7a4 upstream. Add ability to specify routing table ID to the `bpf_fib_lookup` BPF helper. A new field `tbid` is added to `struct bpf_fib_lookup` used as parameters to the `bpf_fib_lookup` BPF helper. When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and `BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup` will be used as the table ID for the fib lookup. If the `tbid` does not exist the fib lookup will fail with `BPF_FIB_LKUP_RET_NOT_FWDED`. The `tbid` field becomes a union over the vlan related output fields in `struct bpf_fib_lookup` and will be zeroed immediately after usage. This functionality is useful in containerized environments. For instance, if a CNI wants to dictate the next-hop for traffic leaving a container it can create a container-specific routing table and perform a fib lookup against this table in a "host-net-namespace-side" TC program. This functionality also allows `ip rule` like functionality at the TC layer, allowing an eBPF program to pick a routing table based on some aspect of the sk_buff. As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN datapath. When egress traffic leaves a Pod an eBPF program attached by Cilium will determine which VRF the egress traffic should target, and then perform a FIB lookup in a specific table representing this VRF's FIB. Signed-off-by: Louis DeLosSantos Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- net/core/filter.c | 14 +++++++++++++- tools/include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 201dc77ebbd77..02cf4d9d8eab5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6687,6 +6691,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/net/core/filter.c b/net/core/filter.c index 3a6110ea4009f..085d211085658 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5885,6 +5891,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5957,7 +5969,7 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 201dc77ebbd77..02cf4d9d8eab5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6687,6 +6691,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; -- GitLab From 2d7ebcb5d878b4311db56eeaf7bdd76dbe9b9a13 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Sat, 7 Oct 2023 10:14:14 +0200 Subject: [PATCH 0616/1418] bpf: Derive source IP addr via bpf_*_fib_lookup() commit dab4e1f06cabb6834de14264394ccab197007302 upstream. Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: https://github.com/cilium/cilium/pull/28283 Signed-off-by: Martynas Pumputis Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6_stubs.h | 5 +++++ include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 18 +++++++++++++++++- net/ipv6/af_inet6.c | 1 + tools/include/uapi/linux/bpf.h | 10 ++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf47372..21da31e1dff5d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 02cf4d9d8eab5..d5d2183730b9f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3121,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6692,6 +6697,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6704,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/net/core/filter.c b/net/core/filter.c index 085d211085658..cb7c4651eaec8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5809,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5951,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -5969,7 +5984,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0b42eb8c55aaf..62247621cea52 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 02cf4d9d8eab5..d5d2183730b9f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3121,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6692,6 +6697,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6704,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ -- GitLab From 8866334e35102d054160a86750b7db9203f721f9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jan 2024 12:14:30 +0100 Subject: [PATCH 0617/1418] x86/efistub: Give up if memory attribute protocol returns an error commit a7a6a01f88e87dec4bf2365571dd2dc7403d52d0 upstream. The recently introduced EFI memory attributes protocol should be used if it exists to ensure that the memory allocation created for the kernel permits execution. This is needed for compatibility with tightened requirements related to Windows logo certification for x86 PCs. Currently, we simply strip the execute protect (XP) attribute from the entire range, but this might be rejected under some firmware security policies, and so in a subsequent patch, this will be changed to only strip XP from the executable region that runs early, and make it read-only (RO) as well. In order to catch any issues early, ensure that the memory attribute protocol works as intended, and give up if it produces spurious errors. Note that the DXE services based fallback was always based on best effort, so don't propagate any errors returned by that API. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 24 ++++++++++++++---------- drivers/firmware/efi/libstub/x86-stub.h | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index a0757a37b482b..784e1b2ae5ccd 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size) +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -225,13 +225,17 @@ void efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE); if (memattr != NULL) { - efi_call_proto(memattr, clear_memory_attributes, rounded_start, - rounded_end - rounded_start, EFI_MEMORY_XP); - return; + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_XP); + if (status != EFI_SUCCESS) + efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); + return status; } if (efi_dxe_table == NULL) - return; + return EFI_SUCCESS; /* * Don't modify memory region attributes, they are @@ -244,7 +248,7 @@ void efi_adjust_memory_range_protection(unsigned long start, status = efi_dxe_call(get_memory_space_descriptor, start, &desc); if (status != EFI_SUCCESS) - return; + break; next = desc.base_address + desc.length; @@ -269,8 +273,10 @@ void efi_adjust_memory_range_protection(unsigned long start, unprotect_start, unprotect_start + unprotect_size, status); + break; } } + return EFI_SUCCESS; } static efi_char16_t *efistub_fw_vendor(void) @@ -800,9 +806,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) *kernel_entry = addr + entry; - efi_adjust_memory_range_protection(addr, kernel_total_size); - - return EFI_SUCCESS; + return efi_adjust_memory_range_protection(addr, kernel_total_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h index 37c5a36b9d8cf..1c20e99a64944 100644 --- a/drivers/firmware/efi/libstub/x86-stub.h +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -5,8 +5,8 @@ extern void trampoline_32bit_src(void *, bool); extern const u16 trampoline_ljmp_imm_offset; -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size); +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); #ifdef CONFIG_X86_64 efi_status_t efi_setup_5level_paging(void); -- GitLab From 585a344af6bcac222608a158fc2830ff02712af5 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 24 Jan 2024 16:31:28 +0000 Subject: [PATCH 0618/1418] xen/events: close evtchn after mapping cleanup commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream. shutdown_pirq and startup_pirq are not taking the irq_mapping_update_lock because they can't due to lock inversion. Both are called with the irq_desc->lock being taking. The lock order, however, is first irq_mapping_update_lock and then irq_desc->lock. This opens multiple races: - shutdown_pirq can be interrupted by a function that allocates an event channel: CPU0 CPU1 shutdown_pirq { xen_evtchn_close(e) __startup_pirq { EVTCHNOP_bind_pirq -> returns just freed evtchn e set_evtchn_to_irq(e, irq) } xen_irq_info_cleanup() { set_evtchn_to_irq(e, -1) } } Assume here event channel e refers here to the same event channel number. After this race the evtchn_to_irq mapping for e is invalid (-1). - __startup_pirq races with __unbind_from_irq in a similar way. Because __startup_pirq doesn't take irq_mapping_update_lock it can grab the evtchn that __unbind_from_irq is currently freeing and cleaning up. In this case even though the event channel is allocated, its mapping can be unset in evtchn_to_irq. The fix is to first cleanup the mappings and then close the event channel. In this way, when an event channel gets allocated it's potential previous evtchn_to_irq mappings are guaranteed to be unset already. This is also the reverse order of the allocation where first the event channel is allocated and then the mappings are setup. On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal [un]bind interfaces"), we hit a BUG like the following during probing of NVMe devices. The issue is that during nvme_setup_io_queues, pci_free_irq is called for every device which results in a call to shutdown_pirq. With many nvme devices it's therefore likely to hit this race during boot because there will be multiple calls to shutdown_pirq and startup_pirq are running potentially in parallel. ------------[ cut here ]------------ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled kernel BUG at drivers/xen/events/events_base.c:499! invalid opcode: 0000 [#1] SMP PTI CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 Workqueue: nvme-reset-wq nvme_reset_work RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_trace_log_lvl+0x1c1/0x2d9 ? show_trace_log_lvl+0x1c1/0x2d9 ? set_affinity_irq+0xdc/0x1c0 ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0x90/0x110 ? bind_evtchn_to_cpu+0xdf/0xf0 ? do_error_trap+0x65/0x80 ? bind_evtchn_to_cpu+0xdf/0xf0 ? exc_invalid_op+0x4e/0x70 ? bind_evtchn_to_cpu+0xdf/0xf0 ? asm_exc_invalid_op+0x12/0x20 ? bind_evtchn_to_cpu+0xdf/0xf0 ? bind_evtchn_to_cpu+0xc5/0xf0 set_affinity_irq+0xdc/0x1c0 irq_do_set_affinity+0x1d7/0x1f0 irq_setup_affinity+0xd6/0x1a0 irq_startup+0x8a/0xf0 __setup_irq+0x639/0x6d0 ? nvme_suspend+0x150/0x150 request_threaded_irq+0x10c/0x180 ? nvme_suspend+0x150/0x150 pci_request_irq+0xa8/0xf0 ? __blk_mq_free_request+0x74/0xa0 queue_request_irq+0x6f/0x80 nvme_create_queue+0x1af/0x200 nvme_create_io_queues+0xbd/0xf0 nvme_setup_io_queues+0x246/0x320 ? nvme_irq_check+0x30/0x30 nvme_reset_work+0x1c8/0x400 process_one_work+0x1b0/0x350 worker_thread+0x49/0x310 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Modules linked in: ---[ end trace a11715de1eee1873 ]--- Fixes: d46a78b05c0e ("xen: implement pirq type event channels") Cc: stable@vger.kernel.org Co-debugged-by: Andrew Panyakin Signed-off-by: Maximilian Heyne Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de Signed-off-by: Juergen Gross Signed-off-by: Maximilian Heyne Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 00f8e349921d4..96b96516c9806 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; @@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq) } xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } xen_free_irq(irq); -- GitLab From 61adba85cc40287232a539e607164f273260e0fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Mar 2024 14:45:20 +0000 Subject: [PATCH 0619/1418] Linux 6.1.81 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240304211556.993132804@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Ron Economos Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Mateusz Jończyk Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Yann Sionneau Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bc4adb561a7cf..e13df565a1cb6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 850bb481890fa830f17e9e38db792931e0b147e4 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 19 Feb 2024 13:14:32 +0800 Subject: [PATCH 0620/1418] ceph: switch to corrected encoding of max_xattr_size in mdsmap [ Upstream commit 51d31149a88b5c5a8d2d33f06df93f6187a25b4c ] The addition of bal_rank_mask with encoding version 17 was merged into ceph.git in Oct 2022 and made it into v18.2.0 release normally. A few months later, the much delayed addition of max_xattr_size got merged, also with encoding version 17, placed before bal_rank_mask in the encoding -- but it didn't make v18.2.0 release. The way this ended up being resolved on the MDS side is that bal_rank_mask will continue to be encoded in version 17 while max_xattr_size is now encoded in version 18. This does mean that older kernels will misdecode version 17, but this is also true for v18.2.0 and v18.2.1 clients in userspace. The best we can do is backport this adjustment -- see ceph.git commit 78abfeaff27fee343fb664db633de5b221699a73 for details. [ idryomov: changelog ] Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/64440 Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size") Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Venky Shankar Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/mdsmap.c | 7 ++++--- include/linux/ceph/mdsmap.h | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 3fbabc98e1f70..4a089d70ebd07 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -379,10 +379,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc277..fcc95bff72a57 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,7 +25,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ -- GitLab From ea2a1052f23c5c113be7e46444984d96d4bf2687 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: [PATCH 0621/1418] net: lan78xx: fix runtime PM count underflow on link stop [ Upstream commit 1eecc7ab82c42133b748e1895275942a054a7f67 ] Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 4fd4563811299..366e83ed0a973 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3137,7 +3137,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } -- GitLab From 6632e19acbdcf16603fdd632fcf20f3126d390a9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: [PATCH 0622/1418] ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able [ Upstream commit cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c ] Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6dc554e810a17..086cc25730338 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2947,8 +2947,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10543,6 +10543,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10559,6 +10597,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10567,9 +10610,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10597,9 +10637,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10608,6 +10645,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** -- GitLab From 8e23edc54a5cb3d4ea881e424673c824255a3422 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: [PATCH 0623/1418] i40e: disable NAPI right after disabling irqs when handling xsk_pool [ Upstream commit d562b11c1eac7d73f4c778b4cbe5468f86b1f20d ] Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 76455405a6d8e..d8a7fb21b7b76 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13569,9 +13569,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); -- GitLab From 5822c02707dac0a995437ef27ba1c441aaca0e1f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:53 +0100 Subject: [PATCH 0624/1418] ice: reorder disabling IRQ and NAPI in ice_qp_dis [ Upstream commit 99099c6bc75a30b76bb5d6774a0509ab6f06af05 ] ice_qp_dis() currently does things in very mixed way. Tx is stopped before disabling IRQ on related queue vector, then it takes care of disabling Rx and finally NAPI is disabled. Let us start with disabling IRQs in the first place followed by turning off NAPI. Then it is safe to handle queues. One subtle change on top of that is that even though ice_qp_ena() looks more sane, clear ICE_CFG_BUSY as the last thing there. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 41ee081eb8875..48cf24709fe32 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -171,6 +171,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -187,13 +191,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -256,11 +257,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); free_buf: kfree(qg_buf); return err; -- GitLab From 44faf8a48294fcb8c93daa7840249c2c4cd6e68e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Feb 2024 14:34:44 -0500 Subject: [PATCH 0625/1418] tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51270d573a8d9dd5afdc7934de97d66c0e14b5fd ] I'm updating __assign_str() and will be removing the second parameter. To make sure that it does not break anything, I make sure that it matches the __string() field, as that is where the string is actually going to be saved in. To make sure there's nothing that breaks, I added a WARN_ON() to make sure that what was used in __string() is the same that is used in __assign_str(). In doing this change, an error was triggered as __assign_str() now expects the string passed in to be a char * value. I instead had the following warning: include/trace/events/qdisc.h: In function ‘trace_event_raw_event_qdisc_reset’: include/trace/events/qdisc.h:91:35: error: passing argument 1 of 'strcmp' from incompatible pointer type [-Werror=incompatible-pointer-types] 91 | __assign_str(dev, qdisc_dev(q)); That's because the qdisc_enqueue() and qdisc_reset() pass in qdisc_dev(q) to __assign_str() and to __string(). But that function returns a pointer to struct net_device and not a string. It appears that these events are just saving the pointer as a string and then reading it as a string as well. Use qdisc_dev(q)->name to save the device instead. Fixes: a34dac0b90552 ("net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/trace/events/qdisc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb057..1f4258308b967 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; -- GitLab From c0b22568a9d8384fd000cc49acb8f74bde40d1b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: [PATCH 0626/1418] geneve: make sure to pull inner header in geneve_rx() [ Upstream commit 1ca1ba465e55b9460e4e75dec9fff31e708fec74 ] syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f393e454f45ca..3f8da6f0b25ce 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) -- GitLab From 0de693d68b0a18d5e256556c7c62d92cca35ad52 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Mar 2024 09:06:08 +0100 Subject: [PATCH 0627/1418] net: sparx5: Fix use after free inside sparx5_del_mact_entry [ Upstream commit 89d72d4125e94aa3c2140fedd97ce07ba9e37674 ] Based on the static analyzis of the code it looks like when an entry from the MAC table was removed, the entry was still used after being freed. More precise the vid of the mac_entry was used after calling devm_kfree on the mac_entry. The fix consists in first using the vid of the mac_entry to delete the entry from the HW and after that to free it. Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240301080608.3053468-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2a..75868b3f548ec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); -- GitLab From 6293ff942e9ce72ae9fced8c277f078e97843dd4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jan 2024 13:51:58 -0800 Subject: [PATCH 0628/1418] ice: virtchnl: stop pretending to support RSS over AQ or registers [ Upstream commit 2652b99e43403dc464f3648483ffb38e48872fe4 ] The E800 series hardware uses the same iAVF driver as older devices, including the virtchnl negotiation scheme. This negotiation scheme includes a mechanism to determine what type of RSS should be supported, including RSS over PF virtchnl messages, RSS over firmware AdminQ messages, and RSS via direct register access. The PF driver will always prefer VIRTCHNL_VF_OFFLOAD_RSS_PF if its supported by the VF driver. However, if an older VF driver is loaded, it may request only VIRTCHNL_VF_OFFLOAD_RSS_REG or VIRTCHNL_VF_OFFLOAD_RSS_AQ. The ice driver happily agrees to support these methods. Unfortunately, the underlying hardware does not support these mechanisms. The E800 series VFs don't have the appropriate registers for RSS_REG. The mailbox queue used by VFs for VF to PF communication blocks messages which do not have the VF-to-PF opcode. Stop lying to the VF that it could support RSS over AdminQ or registers, as these interfaces do not work when the hardware is operating on an E800 series device. In practice this is unlikely to be hit by any normal user. The iAVF driver has supported RSS over PF virtchnl commands since 2016, and always defaults to using RSS_PF if possible. In principle, nothing actually stops the existing VF from attempting to access the registers or send an AQ command. However a properly coded VF will check the capability flags and will report a more useful error if it detects a case where the driver does not support the RSS offloads that it does. Fixes: 1071a8358a28 ("ice: Implement virtchnl commands for AVF support") Signed-off-by: Jacob Keller Reviewed-by: Alan Brady Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 +-------- drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 6c03ebf81ffda..4b71392f60df1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7d034..63e83e8b97e55 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING -- GitLab From afdd29726a6de4ba27cd15590661424c888dc596 Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: [PATCH 0629/1418] net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() [ Upstream commit 06e456a05d669ca30b224b8ed962421770c1496c ] The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ab46cfca4028d..3117f65253b37 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7681,6 +7681,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; -- GitLab From 63a3c1f3c9ecc654d851e7906d05334cd0c236e2 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Mon, 19 Feb 2024 10:08:43 +0100 Subject: [PATCH 0630/1418] igc: avoid returning frame twice in XDP_REDIRECT [ Upstream commit ef27f655b438bed4c83680e4f01e1cde2739854b ] When a frame can not be transmitted in XDP_REDIRECT (e.g. due to a full queue), it is necessary to free it by calling xdp_return_frame_rx_napi. However, this is the responsibility of the caller of the ndo_xdp_xmit (see for example bq_xmit_all in kernel/bpf/devmap.c) and thus calling it inside igc_xdp_xmit (which is the ndo_xdp_xmit of the igc driver) as well will lead to memory corruption. In fact, bq_xmit_all expects that it can return all frames after the last successfully transmitted one. Therefore, break for the first not transmitted frame, but do not call xdp_return_frame_rx_napi in igc_xdp_xmit. This is equally implemented in other Intel drivers such as the igb. There are two alternatives to this that were rejected: 1. Return num_frames as all the frames would have been transmitted and release them inside igc_xdp_xmit. While it might work technically, it is not what the return value is meant to represent (i.e. the number of SUCCESSFULLY transmitted packets). 2. Rework kernel/bpf/devmap.c and all drivers to support non-consecutively dropped packets. Besides being complex, it likely has a negative performance impact without a significant gain since it is anyway unlikely that the next frame can be transmitted if the previous one was dropped. The memory corruption can be reproduced with the following script which leads to a kernel panic after a few seconds. It basically generates more traffic than a i225 NIC can transmit and pushes it via XDP_REDIRECT from a virtual interface to the physical interface where frames get dropped. #!/bin/bash INTERFACE=enp4s0 INTERFACE_IDX=`cat /sys/class/net/$INTERFACE/ifindex` sudo ip link add dev veth1 type veth peer name veth2 sudo ip link set up $INTERFACE sudo ip link set up veth1 sudo ip link set up veth2 cat << EOF > redirect.bpf.c SEC("prog") int redirect(struct xdp_md *ctx) { return bpf_redirect($INTERFACE_IDX, 0); } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c redirect.bpf.c -o redirect.bpf.o sudo ip link set veth2 xdp obj redirect.bpf.o cat << EOF > pass.bpf.c SEC("prog") int pass(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c pass.bpf.c -o pass.bpf.o sudo ip link set $INTERFACE xdp obj pass.bpf.o cat << EOF > trafgen.cfg { /* Ethernet Header */ 0xe8, 0x6a, 0x64, 0x41, 0xbf, 0x46, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 1, 1, # IP Src - adapt as needed 10, 0, 1, 2, # IP Dest - adapt as needed const16(6666), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } EOF sudo trafgen -i trafgen.cfg -b3000MB -o veth1 --cpp Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Signed-off-by: Florian Kauer Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4b6f882b380dc..e052f49cc08d7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6330,7 +6330,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6346,16 +6346,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6363,7 +6362,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, -- GitLab From 394334fe2ae3b9f1e2332b873857e84cb28aac18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Mar 2024 14:48:00 +0000 Subject: [PATCH 0631/1418] net/ipv6: avoid possible UAF in ip6_route_mpath_notify() [ Upstream commit 685f7d531264599b3f167f1e94bbd22f120e5fab ] syzbot found another use-after-free in ip6_route_mpath_notify() [1] Commit f7225172f25a ("net/ipv6: prevent use after free in ip6_route_mpath_notify") was not able to fix the root cause. We need to defer the fib6_info_release() calls after ip6_route_mpath_notify(), in the cleanup phase. [1] BUG: KASAN: slab-use-after-free in rt6_fill_node+0x1460/0x1ac0 Read of size 4 at addr ffff88809a07fc64 by task syz-executor.2/23037 CPU: 0 PID: 23037 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-01035-gea7f3cfaa588 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x167/0x540 mm/kasan/report.c:488 kasan_report+0x142/0x180 mm/kasan/report.c:601 rt6_fill_node+0x1460/0x1ac0 inet6_rt_notify+0x13b/0x290 net/ipv6/route.c:6184 ip6_route_mpath_notify net/ipv6/route.c:5198 [inline] ip6_route_multipath_add net/ipv6/route.c:5404 [inline] inet6_rtm_newroute+0x1d0f/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f73dd87dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f73de6550c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f73dd9ac050 RCX: 00007f73dd87dda9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f73dd8ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f73dd9ac050 R15: 00007ffdbdeb7858 Allocated by task 23037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:372 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:389 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3981 [inline] __kmalloc+0x22e/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] fib6_info_alloc+0x2e/0xf0 net/ipv6/ip6_fib.c:155 ip6_route_info_create+0x445/0x12b0 net/ipv6/route.c:3758 ip6_route_multipath_add net/ipv6/route.c:5298 [inline] inet6_rtm_newroute+0x744/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 Freed by task 16: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x4e/0x60 mm/kasan/generic.c:640 poison_slab_object+0xa6/0xe0 mm/kasan/common.c:241 __kasan_slab_free+0x34/0x70 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x14a/0x380 mm/slub.c:4409 rcu_do_batch kernel/rcu/tree.c:2190 [inline] rcu_core+0xd76/0x1810 kernel/rcu/tree.c:2465 __do_softirq+0x2bb/0x942 kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xae/0x100 mm/kasan/generic.c:586 __call_rcu_common kernel/rcu/tree.c:2715 [inline] call_rcu+0x167/0xa80 kernel/rcu/tree.c:2829 fib6_info_release include/net/ip6_fib.h:341 [inline] ip6_route_multipath_add net/ipv6/route.c:5344 [inline] inet6_rtm_newroute+0x114d/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 The buggy address belongs to the object at ffff88809a07fc00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 100 bytes inside of freed 512-byte region [ffff88809a07fc00, ffff88809a07fe00) The buggy address belongs to the physical page: page:ffffea0002681f00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9a07c head:ffffea0002681f00 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888014c41c80 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 23028, tgid 23027 (syz-executor.4), ts 2340253595219, free_ts 2339107097036 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x255/0x680 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2190 allocate_slab mm/slub.c:2354 [inline] new_slab+0x84/0x2f0 mm/slub.c:2407 ___slab_alloc+0xd17/0x13e0 mm/slub.c:3540 __slab_alloc mm/slub.c:3625 [inline] __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc+0x2e0/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] new_dir fs/proc/proc_sysctl.c:956 [inline] get_subdir fs/proc/proc_sysctl.c:1000 [inline] sysctl_mkdir_p fs/proc/proc_sysctl.c:1295 [inline] __register_sysctl_table+0xb30/0x1440 fs/proc/proc_sysctl.c:1376 neigh_sysctl_register+0x416/0x500 net/core/neighbour.c:3859 devinet_sysctl_register+0xaf/0x1f0 net/ipv4/devinet.c:2644 inetdev_init+0x296/0x4d0 net/ipv4/devinet.c:286 inetdev_event+0x338/0x15c0 net/ipv4/devinet.c:1555 notifier_call_chain+0x18f/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:1987 [inline] call_netdevice_notifiers net/core/dev.c:2001 [inline] register_netdevice+0x15b2/0x1a20 net/core/dev.c:10340 br_dev_newlink+0x27/0x100 net/bridge/br_netlink.c:1563 rtnl_newlink_create net/core/rtnetlink.c:3497 [inline] __rtnl_newlink net/core/rtnetlink.c:3717 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3730 page last free pid 11583 tgid 11583 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x968/0xa90 mm/page_alloc.c:2346 free_unref_page+0x37/0x3f0 mm/page_alloc.c:2486 kasan_depopulate_vmalloc_pte+0x74/0x90 mm/kasan/shadow.c:415 apply_to_pte_range mm/memory.c:2619 [inline] apply_to_pmd_range mm/memory.c:2663 [inline] apply_to_pud_range mm/memory.c:2699 [inline] apply_to_p4d_range mm/memory.c:2735 [inline] __apply_to_page_range+0x8ec/0xe40 mm/memory.c:2769 kasan_release_vmalloc+0x9a/0xb0 mm/kasan/shadow.c:532 __purge_vmap_area_lazy+0x163f/0x1a10 mm/vmalloc.c:1770 drain_vmap_area_work+0x40/0xd0 mm/vmalloc.c:1804 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Memory state around the buggy address: ffff88809a07fb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88809a07fb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809a07fc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809a07fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809a07fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240303144801.702646-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/route.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f65dc750feb8..887599d351b8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5335,19 +5335,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5355,6 +5343,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5415,8 +5409,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } -- GitLab From 3420b3ff1ff489c177ea1cb7bd9fbbc4e9a0be95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 5 Mar 2024 22:31:32 +0100 Subject: [PATCH 0632/1418] cpumap: Zero-initialise xdp_rxq_info struct before running XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2487007aa3b9fafbd2cb14068f49791ce1d7ede5 ] When running an XDP program that is attached to a cpumap entry, we don't initialise the xdp_rxq_info data structure being used in the xdp_buff that backs the XDP program invocation. Tobias noticed that this leads to random values being returned as the xdp_md->rx_queue_index value for XDP programs running in a cpumap. This means we're basically returning the contents of the uninitialised memory, which is bad. Fix this by zero-initialising the rxq data structure before running the XDP program. Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Reported-by: Tobias Böhm Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240305213132.11955-1-toke@redhat.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 08a8e81027289..0508937048137 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -222,7 +222,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; -- GitLab From 7faff12e828d9c643028259c0f7a2b411cd96607 Mon Sep 17 00:00:00 2001 From: "Tobias Jakobi (Compleo)" Date: Mon, 4 Mar 2024 16:41:35 +0100 Subject: [PATCH 0633/1418] net: dsa: microchip: fix register write order in ksz8_ind_write8() [ Upstream commit b7fb7729c94fb2d23c79ff44f7a2da089c92d81c ] This bug was noticed while re-implementing parts of the kernel driver in userspace using spidev. The goal was to enable some of the errata workarounds that Microchip describes in their errata sheet [1]. Both the errata sheet and the regular datasheet of e.g. the KSZ8795 imply that you need to do this for indirect register accesses: - write a 16-bit value to a control register pair (this value consists of the indirect register table, and the offset inside the table) - either read or write an 8-bit value from the data storage register (indicated by REG_IND_BYTE in the kernel) The current implementation has the order swapped. It can be proven, by reading back some indirect register with known content (the EEE register modified in ksz8_handle_global_errata() is one of these), that this implementation does not work. Private discussion with Oleksij Rempel of Pengutronix has revealed that the workaround was apparantly never tested on actual hardware. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/Errata/KSZ87xx-Errata-DS80000687C.pdf Signed-off-by: Tobias Jakobi (Compleo) Reviewed-by: Oleksij Rempel Fixes: 7b6e6235b664 ("net: dsa: microchip: ksz8795: handle eee specif erratum") Link: https://lore.kernel.org/r/20240304154135.161332-1-tobias.jakobi.compleo@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/microchip/ksz8795.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c63e082dc57dc..934600eccbaf2 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); -- GitLab From 998fd719e6d6468b930ac0c44552ea9ff8b07b80 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 5 Mar 2024 08:13:08 +0800 Subject: [PATCH 0634/1418] net/rds: fix WARNING in rds_conn_connect_if_down [ Upstream commit c055fc00c07be1f0df7375ab0036cebd1106ed38 ] If connection isn't established yet, get_mr() will fail, trigger connection after get_mr(). Fixes: 584a8279a44a ("RDS: RDMA: return appropriate error on rdma map failures") Reported-and-tested-by: syzbot+d4faee732755bba9838e@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/rdma.c | 3 +++ net/rds/send.c | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d36593ad..a4e3c5de998be 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 0c5504068e3c2..a4ba45c430d81 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1314,12 +1314,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", -- GitLab From bce83144ba7ec7dd231c13aa065bc6900efe34f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Mar 2024 13:38:15 +0100 Subject: [PATCH 0635/1418] netfilter: nft_ct: fix l3num expectations with inet pseudo family [ Upstream commit 99993789966a6eb4f1295193dc543686899892d3 ] Following is rejected but should be allowed: table inet t { ct expectation exp1 { [..] l3proto ip Valid combos are: table ip t, l3proto ip table ip6 t, l3proto ip6 table inet t, l3proto ip OR l3proto ip6 Disallow inet pseudeo family, the l3num must be a on-wire protocol known to conntrack. Retain NFPROTO_INET case to make it clear its rejected intentionally rather as oversight. Fixes: 8059918a1377 ("netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_ct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8df7564f0611e..2bfe3cdfbd581 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1237,14 +1237,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l3num) { case NFPROTO_IPV4: case NFPROTO_IPV6: - if (priv->l3num != ctx->family) - return -EINVAL; + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; - fallthrough; - case NFPROTO_INET: - break; + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ default: - return -EOPNOTSUPP; + return -EAFNOSUPPORT; } priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); -- GitLab From 39001e3c42000e7c2038717af0d33c32319ad591 Mon Sep 17 00:00:00 2001 From: Lena Wang Date: Tue, 5 Mar 2024 11:38:55 +0000 Subject: [PATCH 0636/1418] netfilter: nf_conntrack_h323: Add protection for bmp length out of range [ Upstream commit 767146637efc528b5e3d31297df115e85a2fd362 ] UBSAN load reports an exception of BRK#5515 SHIFT_ISSUE:Bitwise shifts that are out of bounds for their data type. vmlinux get_bitmap(b=75) + 712 vmlinux decode_seq(bs=0xFFFFFFD008037000, f=0xFFFFFFD008037018, level=134443100) + 1956 vmlinux decode_choice(base=0xFFFFFFD0080370F0, level=23843636) + 1216 vmlinux decode_seq(f=0xFFFFFFD0080371A8, level=134443500) + 812 vmlinux decode_choice(base=0xFFFFFFD008037280, level=0) + 1216 vmlinux DecodeRasMessage() + 304 vmlinux ras_help() + 684 vmlinux nf_confirm() + 188 Due to abnormal data in skb->data, the extension bitmap length exceeds 32 when decoding ras message then uses the length to make a shift operation. It will change into negative after several loop. UBSAN load could detect a negative shift as an undefined behaviour and reports exception. So we add the protection to avoid the length exceeding 32. Or else it will return out of range error and stop decoding. Fixes: 5e35941d9901 ("[NETFILTER]: Add H.323 conntrack/NAT helper") Signed-off-by: Lena Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_h323_asn1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b0018..540d97715bd23 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) -- GitLab From 6e49f3ac43e293f86dc0c630de2354a73eec914e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 6 Mar 2024 13:31:38 +0800 Subject: [PATCH 0637/1418] erofs: apply proper VMA alignment for memory mapped files on THP [ Upstream commit 4127caee89612a84adedd78c9453089138cd5afe ] There are mainly two reasons that thp_get_unmapped_area() should be used for EROFS as other filesystems: - It's needed to enable PMD mappings as a FSDAX filesystem, see commit 74d2fad1334d ("thp, dax: add thp_get_unmapped_area for pmd mappings"); - It's useful together with large folios and CONFIG_READ_ONLY_THP_FOR_FS which enable THPs for mmapped files (e.g. shared libraries) even without FSDAX. See commit 1854bc6e2420 ("mm/readahead: Align file mappings for non-DAX"). Fixes: 06252e9ce05b ("erofs: dax support for non-tailpacking regular file") Fixes: ce529cc25b18 ("erofs: enable large folios for iomap mode") Fixes: e6687b89225e ("erofs: enable large folios for fscache mode") Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240306053138.2240206-1-hsiangkao@linux.alibaba.com Signed-off-by: Sasha Levin --- fs/erofs/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index b32801d716f89..9d20e5d23ae0b 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -440,4 +440,5 @@ const struct file_operations erofs_file_fops = { .read_iter = erofs_file_read_iter, .mmap = erofs_file_mmap, .splice_read = generic_file_splice_read, + .get_unmapped_area = thp_get_unmapped_area, }; -- GitLab From dec82a8fc45c6ce494c2cb31f001a2aadb132b57 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:35 +0800 Subject: [PATCH 0638/1418] netrom: Fix a data-race around sysctl_netrom_default_path_quality [ Upstream commit 958d6145a6d9ba9e075c921aead8753fb91c9101 ] We need to protect the reader reading sysctl_netrom_default_path_quality because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76ca5..6f709fdffc11f 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; -- GitLab From e439607291c082332e1e35baf8faf8552e6bcb4a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:36 +0800 Subject: [PATCH 0639/1418] netrom: Fix a data-race around sysctl_netrom_obsolescence_count_initialiser [ Upstream commit cfd9f4a740f772298308b2e6070d2c744fb5cf79 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6f709fdffc11f..b8ddd8048f352 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } -- GitLab From a47d68d777b41862757b7e3051f2d46d6e25f87b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:37 +0800 Subject: [PATCH 0640/1418] netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser [ Upstream commit 119cae5ea3f9e35cdada8e572cc067f072fa825a ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_dev.c | 2 +- net/netrom/nr_out.c | 2 +- net/netrom/nr_subr.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b387..2c34389c3ce6f 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b71..5e531394a724b 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff4..c3bbd5880850b 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; -- GitLab From fed835d415766a94fc0246dcebc3af4c03fe9941 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:38 +0800 Subject: [PATCH 0641/1418] netrom: Fix a data-race around sysctl_netrom_transport_timeout [ Upstream commit 60a7a152abd494ed4f69098cf0f322e6bb140612 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec5747969f964..3c6567af2ba47 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,7 +453,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = -- GitLab From d28fa5f0e6c1554e2829f73a6a276c9a49689d04 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:39 +0800 Subject: [PATCH 0642/1418] netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries [ Upstream commit e799299aafed417cc1f32adccb2a0e5268b3f6d5 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3c6567af2ba47..be404ace98786 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -457,7 +457,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = msecs_to_jiffies(sysctl_netrom_transport_busy_delay); nr->idle = -- GitLab From 5deaef2bf56456c71b841e0dfde1bee2fd88c4eb Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:40 +0800 Subject: [PATCH 0643/1418] netrom: Fix a data-race around sysctl_netrom_transport_acknowledge_delay [ Upstream commit 806f462ba9029d41aadf8ec93f2f99c5305deada ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index be404ace98786..7428ea436e318 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -455,7 +455,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t1 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = -- GitLab From 5ac337138272d26d6d3d4f71bc5b1a87adf8b24d Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:41 +0800 Subject: [PATCH 0644/1418] netrom: Fix a data-race around sysctl_netrom_transport_busy_delay [ Upstream commit 43547d8699439a67b78d6bb39015113f7aa360fd ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7428ea436e318..ee6621c0d2e45 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,7 +459,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); nr->window = sysctl_netrom_transport_requested_window_size; -- GitLab From 46803b776d869b0c36041828a83c4f7da2dfa03b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:42 +0800 Subject: [PATCH 0645/1418] netrom: Fix a data-race around sysctl_netrom_transport_requested_window_size [ Upstream commit a2e706841488f474c06e9b33f71afc947fb3bf56 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ee6621c0d2e45..88941b66631fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -462,7 +462,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; -- GitLab From 498f1d6da11ed6d736d655a2db14ee2d9569eecb Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:43 +0800 Subject: [PATCH 0646/1418] netrom: Fix a data-race around sysctl_netrom_transport_no_activity_timeout [ Upstream commit f99b494b40431f0ca416859f2345746199398e2b ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 88941b66631fc..5472e79cde830 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -461,7 +461,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t4 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; -- GitLab From 4c02b9ccbb11862ee39850b2b285664cd579b039 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:44 +0800 Subject: [PATCH 0647/1418] netrom: Fix a data-race around sysctl_netrom_routing_control [ Upstream commit b5dffcb8f71bdd02a4e5799985b51b12f4eeaf76 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b8ddd8048f352..89e12e6eea2ef 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ -- GitLab From cfedde3058bf976f2f292c0a236edd43afcdab57 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:45 +0800 Subject: [PATCH 0648/1418] netrom: Fix a data-race around sysctl_netrom_link_fails_count [ Upstream commit bc76645ebdd01be9b9994dac39685a3d0f6f7985 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 89e12e6eea2ef..70480869ad1c5 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } -- GitLab From 43464808669ba9d23996f0b6d875450191687caf Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:46 +0800 Subject: [PATCH 0649/1418] netrom: Fix data-races around sysctl_net_busy_read [ Upstream commit d380ce70058a4ccddc3e5f5c2063165dc07672c6 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5472e79cde830..f0879295de110 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f69d7e..97944db6b5ac6 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; -- GitLab From 51c4435688ebedb498dbbf0732a9d5dff18d908d Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 9 Oct 2023 11:32:52 +0200 Subject: [PATCH 0650/1418] KVM: s390: add stat counter for shadow gmap events [ Upstream commit c3235e2dd6956448a562d6b1112205eeebc8ab43 ] The shadow gmap tracks memory of nested guests (guest-3). In certain scenarios, the shadow gmap needs to be rebuilt, which is a costly operation since it involves a SIE exit into guest-1 for every entry in the respective shadow level. Add kvm stat counters when new shadow structures are created at various levels. Also add a counter gmap_shadow_create when a completely fresh shadow gmap is created as well as a counter gmap_shadow_reuse when an existing gmap is being reused. Note that when several levels are shadowed at once, counters on all affected levels will be increased. Also note that not all page table levels need to be present and a ASCE can directly point to e.g. a segment table. In this case, a new segment table will always be equivalent to a new shadow gmap and hence will be counted as gmap_shadow_create and not as gmap_shadow_segment. Signed-off-by: Nico Boehr Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Signed-off-by: Janosch Frank Link: https://lore.kernel.org/r/20231009093304.2555344-2-nrb@linux.ibm.com Message-Id: <20231009093304.2555344-2-nrb@linux.ibm.com> Stable-dep-of: fe752331d4b3 ("KVM: s390: vsie: fix race during shadow creation") Signed-off-by: Sasha Levin --- arch/s390/include/asm/kvm_host.h | 7 +++++++ arch/s390/kvm/gaccess.c | 7 +++++++ arch/s390/kvm/kvm-s390.c | 9 ++++++++- arch/s390/kvm/vsie.c | 5 ++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152b..09abf000359f8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -777,6 +777,13 @@ struct kvm_vm_stat { u64 inject_service_signal; u64 inject_virtio; u64 aen_forward; + u64 gmap_shadow_create; + u64 gmap_shadow_reuse; + u64 gmap_shadow_r1_entry; + u64 gmap_shadow_r2_entry; + u64 gmap_shadow_r3_entry; + u64 gmap_shadow_sg_entry; + u64 gmap_shadow_pg_entry; }; struct kvm_arch_memory_slot { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d364..3beceff5f1c09 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + struct kvm *kvm; struct gmap *parent; union asce asce; union vaddress vaddr; @@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, *fake = 0; *dat_protection = 0; + kvm = sg->private; parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; @@ -1341,6 +1343,7 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r1_entry++; } fallthrough; case ASCE_TYPE_REGION2: { @@ -1369,6 +1372,7 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r2_entry++; } fallthrough; case ASCE_TYPE_REGION3: { @@ -1406,6 +1410,7 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r3_entry++; } fallthrough; case ASCE_TYPE_SEGMENT: { @@ -1439,6 +1444,7 @@ shadow_pgt: rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_sg_entry++; } } /* Return the parent address of the page table */ @@ -1509,6 +1515,7 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + vcpu->kvm->stat.gmap_shadow_pg_entry++; ipte_unlock(vcpu->kvm); mmap_read_unlock(sg->mm); return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f604946ab2c85..348d49268a7ec 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_pfault_done), STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio), - STATS_DESC_COUNTER(VM, aen_forward) + STATS_DESC_COUNTER(VM, aen_forward), + STATS_DESC_COUNTER(VM, gmap_shadow_reuse), + STATS_DESC_COUNTER(VM, gmap_shadow_create), + STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), }; const struct kvm_stats_header kvm_vm_stats_header = { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 740f8b56e63f9..b2dbf08a961e5 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1206,8 +1206,10 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, * we're holding has been unshadowed. If the gmap is still valid, * we can safely reuse it. */ - if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) + if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) { + vcpu->kvm->stat.gmap_shadow_reuse++; return 0; + } /* release the old shadow - if any, and mark the prefix as unmapped */ release_gmap_shadow(vsie_page); @@ -1215,6 +1217,7 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, if (IS_ERR(gmap)) return PTR_ERR(gmap); gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; } -- GitLab From 5df3b81a567eb565029563f26f374ae3803a1dfc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 20 Dec 2023 13:53:17 +0100 Subject: [PATCH 0651/1418] KVM: s390: vsie: fix race during shadow creation [ Upstream commit fe752331d4b361d43cfd0b89534b4b2176057c32 ] Right now it is possible to see gmap->private being zero in kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the fact that we add gmap->private == kvm after creation: static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { [...] gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); gmap->private = vcpu->kvm; Let children inherit the private field of the parent. Reported-by: Marc Hartmayer Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Cc: Cc: David Hildenbrand Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Signed-off-by: Christian Borntraeger Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com Signed-off-by: Sasha Levin --- arch/s390/kvm/vsie.c | 1 - arch/s390/mm/gmap.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b2dbf08a961e5..d90c818a9ae71 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1216,7 +1216,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); - gmap->private = vcpu->kvm; vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 243f673fa6515..662cf23a1b44b 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, return ERR_PTR(-ENOMEM); new->mm = parent->mm; new->parent = gmap_get(parent); + new->private = parent->private; new->orig_asce = asce; new->edat_level = edat_level; new->initialized = false; -- GitLab From 35a0d43cee095e590982c290fde6dabef0623769 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 10:11:30 +0100 Subject: [PATCH 0652/1418] ASoC: codecs: wcd938x: fix headphones volume controls [ Upstream commit 4d0e8bdfa4a57099dc7230952a460903f2e2f8de ] The lowest headphones volume setting does not mute so the leave the TLV mute flag unset. This is specifically needed to let the sound server use the lowest gain setting. Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR") Cc: # 6.5 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index e80be4e4fa8b4..555b74e7172d8 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { -- GitLab From 66d663da8654099591a3aa78cf92d48410e930f7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 17 Jan 2024 08:41:52 +0530 Subject: [PATCH 0653/1418] drm/amd/display: Fix uninitialized variable usage in core_link_ 'read_dpcd() & write_dpcd()' functions [ Upstream commit a58371d632ebab9ea63f10893a6b6731196b6f8d ] The 'status' variable in 'core_link_read_dpcd()' & 'core_link_write_dpcd()' was uninitialized. Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'. drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'. Cc: stable@vger.kernel.org Cc: Jerry Zuo Cc: Jun Lei Cc: Wayne Lin Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index af110bf9470fa..aefca9756dbe8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); -- GitLab From 4e2f0cae0bfe60d99e733cddf98de017a482975f Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:08 +0200 Subject: [PATCH 0654/1418] nfp: flower: add goto_chain_index for ct entry [ Upstream commit 3e44d19934b92398785b3ffc2353b9eba264140e ] The chain_index has different means in pre ct entry and post ct entry. In pre ct entry, it means chain index, but in post ct entry, it means goto chain index, it is confused. chain_index and goto_chain_index may be present in one flow rule, It cannot be distinguished by one field chain_index, both chain_index and goto_chain_index are required in the follow-up patch to support multiple ct zones Another field goto_chain_index is added to record the goto chain index. If no goto action in post ct entry, goto_chain_index is 0. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski Stable-dep-of: cefa98e806fd ("nfp: flower: add hardware offload check for post ct entry") Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++-- drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 7af03b45555dd..da7a47416a208 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ - if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) return -EINVAL; err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); @@ -1776,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, if (IS_ERR(ct_entry)) return PTR_ERR(ct_entry); ct_entry->type = CT_TYPE_PRE_CT; - ct_entry->chain_index = ct_goto->chain_index; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1799,6 +1800,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, struct nfp_fl_ct_zone_entry *zt; bool wildcarded = false; struct flow_match_ct ct; + struct flow_action_entry *ct_goto; flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { @@ -1823,6 +1825,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_POST_CT; ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; list_add(&ct_entry->list_node, &zt->post_ct_list); zt->post_ct_count++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 762c0b36e269b..9440ab776ecea 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { * @cookie: Flow cookie, same as original TC flow, used as key * @list_node: Used by the list * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to @@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; + u32 goto_chain_index; enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; -- GitLab From b0b89b470a863fb84360a89365ee69612d2863b5 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:08 +0200 Subject: [PATCH 0655/1418] nfp: flower: add hardware offload check for post ct entry [ Upstream commit cefa98e806fd4e2a5e2047457a11ae5f17b8f621 ] The nfp offload flow pay will not allocate a mask id when the out port is openvswitch internal port. This is because these flows are used to configure the pre_tun table and are never actually send to the firmware as an add-flow message. When a tc rule which action contains ct and the post ct entry's out port is openvswitch internal port, the merge offload flow pay with the wrong mask id of 0 will be send to the firmware. Actually, the nfp can not support hardware offload for this situation, so return EOPNOTSUPP. Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") CC: stable@vger.kernel.org # 5.14+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index da7a47416a208..497766ecdd91d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1797,10 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { -- GitLab From f0c349708290f2632d3103b687c7998945ed4dff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jan 2024 09:58:39 +0100 Subject: [PATCH 0656/1418] readahead: avoid multiple marked readahead pages [ Upstream commit ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 ] ra_alloc_folio() marks a page that should trigger next round of async readahead. However it rounds up computed index to the order of page being allocated. This can however lead to multiple consecutive pages being marked with readahead flag. Consider situation with index == 1, mark == 1, order == 0. We insert order 0 page at index 1 and mark it. Then we bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page at index 2 is marked as well. Then we bump order to 2, index is incremented to 4, mark gets rounded to 4 so page at index 4 is marked as well. The fact that multiple pages get marked within a single readahead window confuses the readahead logic and results in readahead window being trimmed back to 1. This situation is triggered in particular when maximum readahead window size is not a power of two (in the observed case it was 768 KB) and as a result sequential read throughput suffers. Fix the problem by rounding 'mark' down instead of up. Because the index is naturally aligned to 'order', we are guaranteed 'rounded mark' == index iff 'mark' is within the page we are allocating at 'index' and thus exactly one page is marked with readahead flag as required by the readahead code and sequential read performance is restored. This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios"). The commit changed the rounding with the rationale: "... we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read." Although this is true, the fact is this was always the case with read sizes not aligned to folio boundaries and large folios in the page cache just make the situation more obvious (and frequent). Also for sequential read workloads it is better to trigger the readahead earlier rather than later. It is true that the difference in the rounding and thus earlier triggering of the readahead can result in reading more for semi-random workloads. However workloads really suffering from this seem to be rare. In particular I have verified that the workload described in commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading random 100k blocks from a file like: [reader] bs=100k rw=randread numjobs=1 size=64g runtime=60s is not impacted by the rounding change and achieves ~70MB/s in both cases. [jack@suse.cz: fix one more place where mark rounding was done as well] Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") Signed-off-by: Jan Kara Cc: Matthew Wilcox Cc: Guo Xuenan Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index ba43428043a35..e4b772bb70e68 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; -- GitLab From 02e16a41e5439e447052c3f9a66489caf8bb4007 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 0657/1418] selftests/mm: switch to bash from sh [ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ] Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 0899019a7fcb4..e14bdd4455f2d 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index 70a02301f4c27..3d2d2eb9d6fff 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e -- GitLab From a584c7734a4dd050451fcdd65c66317e15660e81 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 0658/1418] selftests: mm: fix map_hugetlb failure on 64K page size systems [ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ] On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 312889edb84ab..c65c55b7a789f 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -70,10 +71,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { -- GitLab From 9158ea9395c12b5623b569916b6fba0171595542 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 0659/1418] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1239e06dfe411..e4441a71368e5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2388,8 +2391,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2409,6 +2413,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2417,6 +2424,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2801,17 +2816,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2823,9 +2872,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1354310cb37b1..fc25a5b09710c 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1570,6 +1570,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; -- GitLab From 2e3ec80ea7ba58bbb210e83b5a0afefee7c171d3 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 0660/1418] xhci: handle isoc Babble and Buffer Overrun events properly [ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e4441a71368e5..239b5edee3268 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2381,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: -- GitLab From a28f4d1e0bed85943d309ac243fd1c200f8af9a2 Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Tue, 23 Jan 2024 12:52:03 +0100 Subject: [PATCH 0661/1418] drm/amdgpu: Reset IH OVERFLOW_CLEAR bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7330256268664ea0a7dd5b07a3fed363093477dd ] Allows us to detect subsequent IH ring buffer overflows as well. Cc: Joshua Ashton Cc: Alex Deucher Cc: Christian König Cc: stable@vger.kernel.org Signed-off-by: Friedrich Vock Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++ 9 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index df385ffc97683..6578ca1b90afa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37ad..c19681492efa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a6..2c02ae69883d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 7cd79a3844b24..657e4ca6f9dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index eec13cb5bf758..84e8e8b008ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a57502..cada9f300a7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b08905d1c00f0..07a5d95be07f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1e83db0c5438d..74c94df423455 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093155c..f1ba76c35cd6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } -- GitLab From 8b5760939db9c49c03b9e19f6c485a8812f48d83 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0662/1418] x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is set commit e95df4ec0c0c9791941f112db699fae794b9862a upstream. Currently MMIO Stale Data mitigation for CPUs not affected by MDS/TAA is to only deploy VERW at VMentry by enabling mmio_stale_data_clear static branch. No mitigation is needed for kernel->user transitions. If such CPUs are also affected by RFDS, its mitigation may set X86_FEATURE_CLEAR_CPU_BUF to deploy VERW at kernel->user and VMentry. This could result in duplicate VERW at VMentry. Fix this by disabling mmio_stale_data_clear static branch when X86_FEATURE_CLEAR_CPU_BUF is enabled. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d1895930e6eb8..c66f6eb40afb1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -421,6 +421,13 @@ static void __init mmio_select_mitigation(void) if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + /* + * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based + * mitigations, disable KVM-only mitigation in that case. + */ + if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + static_branch_disable(&mmio_stale_data_clear); else static_branch_enable(&mmio_stale_data_clear); @@ -497,8 +504,11 @@ static void __init md_clear_update_mitigation(void) taa_mitigation = TAA_MITIGATION_VERW; taa_select_mitigation(); } - if (mmio_mitigation == MMIO_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + /* + * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear + * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. + */ + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } -- GitLab From 29476fac750dddeabc3503bf9b13e05b949d7adb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0663/1418] Documentation/hw-vuln: Add documentation for RFDS commit 4e42765d1be01111df0c0275bbaf1db1acef346e upstream. Add the documentation for transient execution vulnerability Register File Data Sampling (RFDS) that affects Intel Atom CPUs. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../hw-vuln/reg-file-data-sampling.rst | 104 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 6828102baaa7a..3e4a14e38b49e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -21,3 +21,4 @@ are configurable at compile, boot or run time. cross-thread-rsb.rst gather_data_sampling.rst srso + reg-file-data-sampling diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst new file mode 100644 index 0000000000000..0585d02b9a6cb --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -0,0 +1,104 @@ +================================== +Register File Data Sampling (RFDS) +================================== + +Register File Data Sampling (RFDS) is a microarchitectural vulnerability that +only affects Intel Atom parts(also branded as E-cores). RFDS may allow +a malicious actor to infer data values previously used in floating point +registers, vector registers, or integer registers. RFDS does not provide the +ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. + +Affected Processors +=================== +Below is the list of affected Intel processors [#f1]_: + + =================== ============ + Common name Family_Model + =================== ============ + ATOM_GOLDMONT 06_5CH + ATOM_GOLDMONT_D 06_5FH + ATOM_GOLDMONT_PLUS 06_7AH + ATOM_TREMONT_D 06_86H + ATOM_TREMONT 06_96H + ALDERLAKE 06_97H + ALDERLAKE_L 06_9AH + ATOM_TREMONT_L 06_9CH + RAPTORLAKE 06_B7H + RAPTORLAKE_P 06_BAH + ATOM_GRACEMONT 06_BEH + RAPTORLAKE_S 06_BFH + =================== ============ + +As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and +RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as +vulnerable in Linux because they share the same family/model with an affected +part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or +CPUID.HYBRID. This information could be used to distinguish between the +affected and unaffected parts, but it is deemed not worth adding complexity as +the reporting is fixed automatically when these parts enumerate RFDS_NO. + +Mitigation +========== +Intel released a microcode update that enables software to clear sensitive +information using the VERW instruction. Like MDS, RFDS deploys the same +mitigation strategy to force the CPU to clear the affected buffers before an +attacker can extract the secrets. This is achieved by using the otherwise +unused and obsolete VERW instruction in combination with a microcode update. +The microcode clears the affected CPU buffers when the VERW instruction is +executed. + +Mitigation points +----------------- +VERW is executed by the kernel before returning to user space, and by KVM +before VMentry. None of the affected cores support SMT, so VERW is not required +at C-state transitions. + +New bits in IA32_ARCH_CAPABILITIES +---------------------------------- +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +vulnerability and mitigation capability: + +- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. +- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the + microcode that clears the affected buffers on VERW execution. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control RFDS mitigation at boot time with the +parameter "reg_file_data_sampling=". The valid arguments are: + + ========== ================================================================= + on If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and before entering a VM. + off Disables mitigation. + ========== ================================================================= + +Mitigation default is selected by CONFIG_MITIGATION_RFDS. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: No microcode' + - The processor is vulnerable but microcode is not updated. + * - 'Mitigation: Clear Register File' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html -- GitLab From d405b9c03f06b1b5e73ebc4f34452687022f7029 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0664/1418] x86/rfds: Mitigate Register File Data Sampling (RFDS) commit 8076fcde016c9c0e0660543e67bff86cb48a7c9c upstream. RFDS is a CPU vulnerability that may allow userspace to infer kernel stale data previously used in floating point registers, vector registers and integer registers. RFDS only affects certain Intel Atom processors. Intel released a microcode update that uses VERW instruction to clear the affected CPU buffers. Unlike MDS, none of the affected cores support SMT. Add RFDS bug infrastructure and enable the VERW based mitigation by default, that clears the affected buffers just before exiting to userspace. Also add sysfs reporting and cmdline parameter "reg_file_data_sampling" to control the mitigation. For details see: Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 21 +++++ arch/x86/Kconfig | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 8 ++ arch/x86/kernel/cpu/bugs.c | 78 ++++++++++++++++++- arch/x86/kernel/cpu/common.c | 38 ++++++++- drivers/base/cpu.c | 8 ++ include/linux/cpu.h | 2 + 9 files changed, 162 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 13c01b641dc70..78c26280c473b 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -519,6 +519,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4ad60e127e048..2dfe75104e7de 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1107,6 +1107,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. * matches with all driver names. If * is specified, the @@ -3262,6 +3282,7 @@ nospectre_bhb [ARM64] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] spectre_v2_user=off [X86] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2f7af61b49b6c..5caa023e98397 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2565,6 +2565,17 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b60f24b30cb90..b97a70aa4de90 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -477,4 +477,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec955ab2ff034..005e41dc7ee5a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -168,6 +168,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c66f6eb40afb1..c68789fdc123b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -479,6 +479,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) } early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -512,6 +563,11 @@ static void __init md_clear_update_mitigation(void) mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -521,6 +577,8 @@ out: pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -528,11 +586,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -2596,6 +2655,11 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -2757,6 +2821,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } @@ -2831,4 +2898,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 454cdf3418624..758938c94b41e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1248,6 +1248,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1275,9 +1277,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1311,6 +1322,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) ia32_cap & ARCH_CAP_SBDR_SSDP_NO); } +static bool __init vulnerable_to_rfds(u64 ia32_cap) +{ + /* The "immunity" bit trumps everything else: */ + if (ia32_cap & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1419,6 +1448,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index dab70a65377c8..31da94afe4f3d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -589,6 +589,12 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -602,6 +608,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +624,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 008bfa68cfabc..4b06b1f1e267a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- GitLab From b2e92ab17e440a97c716b701ecd897eebca11ac0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0665/1418] KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests commit 2a0180129d726a4b953232175857d442651b55a0 upstream. Mitigation for RFDS requires RFDS_CLEAR capability which is enumerated by MSR_IA32_ARCH_CAPABILITIES bit 27. If the host has it set, export it to guests so that they can deploy the mitigation. RFDS_NO indicates that the system is not vulnerable to RFDS, export it to guests so that they don't deploy the mitigation unnecessarily. When the host is not affected by X86_BUG_RFDS, but has RFDS_NO=0, synthesize RFDS_NO to the guest. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7144e51668136..688bc7b72eb66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1613,7 +1613,8 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) static u64 kvm_get_arch_capabilities(void) { @@ -1650,6 +1651,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* -- GitLab From 92cdc9d71ab084788afd1fcefa315dd33deba068 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:51 +0100 Subject: [PATCH 0666/1418] selftests: mptcp: decrease BW in simult flows [ Upstream commit 5e2f3c65af47e527ccac54060cf909e3306652ff ] When running the simult_flow selftest in slow environments -- e.g. QEmu without KVM support --, the results can be unstable. This selftest checks if the aggregated bandwidth is (almost) fully used as expected. To help improving the stability while still keeping the same validation in place, the BW and the delay are reduced to lower the pressure on the CPU. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 4a417f9d51d67..06ad0510469e3 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -301,10 +301,10 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret -- GitLab From f0b6dc034e1797b0bc91558c0c2f90c8a12b5533 Mon Sep 17 00:00:00 2001 From: Ma Hanghong Date: Thu, 20 Oct 2022 11:46:56 -0400 Subject: [PATCH 0667/1418] drm/amd/display: Wrong colorimetry workaround [ Upstream commit b1a98cf89a695d36c414653634ea7ba91b6e701f ] [Why] For FreeSync HDR, native color space flag in AMD VSIF(BT.709) should be used when intepreting content and color space flag in VSC or AVI infoFrame should be ignored. However, it turned out some userspace application still use color flag in VSC or AVI infoFrame which is incorrect. [How] Transfer function is used when building the VSC and AVI infoFrame. Set colorimetry to BT.709 when all the following match: 1. Pixel format is YCbCr; 2. In FreeSync 2 HDR, color is COLOR_SPACE_2020_YCBCR; 3. Transfer function is TRANSFER_FUNC_GAMMA_22; Tested-by: Mark Broadworth Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Ma Hanghong Signed-off-by: Alex Deucher Stable-dep-of: e6a7df96facd ("drm/amd/display: Fix MST Null Ptr for RV") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++ drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h | 3 ++- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 6 +++++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index da16048bf1004..bea49befdcacc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5938,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; + enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif @@ -6071,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) stream->use_vsc_sdp_for_colorimetry = true; } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) + tf = TRANSFER_FUNC_GAMMA_22; + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 66923f51037a3..e2f80cd0ca8cb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3038,6 +3038,12 @@ static void set_avi_info_frame( hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { + hdmi_info.bits.EC0_EC2 = 0; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; + } + /* TODO: un-hardcode aspect ratio */ aspect = stream->timing.aspect_ratio; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 1d8b746b02f24..edf5845f6a1f7 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,8 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs); + enum dc_color_space cs, + enum color_transfer_func tf); void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 27ceba9d6d658..69691058ab898 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -132,7 +132,8 @@ enum ColorimetryYCCDP { void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs) + enum dc_color_space cs, + enum color_transfer_func tf) { unsigned int vsc_packet_revision = vsc_packet_undefined; unsigned int i; @@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; else if (cs == COLOR_SPACE_2020_YCBCR) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; + + if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + colorimetryFormat = ColorimetryYCC_DP_ITU709; break; default: -- GitLab From 01d992088dce3945f70f49f34b0b911c5213c238 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 22 Jan 2024 13:43:46 -0500 Subject: [PATCH 0668/1418] drm/amd/display: Fix MST Null Ptr for RV [ Upstream commit e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 ] The change try to fix below error specific to RV platform: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 ? _copy_to_user+0x25/0x30 ? drm_ioctl+0x296/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 drm_ioctl_kernel+0xcd/0x170 drm_ioctl+0x26d/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] __x64_sys_ioctl+0x94/0xd0 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6c/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f4dad17f76f Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c> RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0 Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep > typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas> CR2: 0000000000000008 ---[ end trace 0000000000000000 ]--- RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 With a second DP monitor connected, drm_atomic_state in dm atomic check sequence does not include the connector state for the old/existing/first DP monitor. In such case, dsc determination policy would hit a null ptr when it tries to iterate the old/existing stream that does not have a valid connector state attached to it. When that happens, dm atomic check should call drm_atomic_get_connector_state for a new connector state. Existing dm has already done that, except for RV due to it does not have official support of dsc where .num_dsc is not defined in dcn10 resource cap, that prevent from getting drm_atomic_get_connector_state called. So, skip dsc determination policy for ASICs that don't have DSC support. Cc: stable@vger.kernel.org # 6.1+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314 Reviewed-by: Wayne Lin Acked-by: Hamza Mahfooz Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bea49befdcacc..a6c6f286a5988 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10123,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); -- GitLab From eba76e4808c9ad2c41ec5652a9335a8b5c03a709 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:25:54 +0200 Subject: [PATCH 0669/1418] getrusage: add the "signal_struct *sig" local variable [ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ] No functional changes, cleanup/preparation. Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index c85e1abf7b7c7..177155ba50cd3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1779,6 +1779,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long flags; u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; + struct signal_struct *sig = p->signal; memset((char *)r, 0, sizeof (*r)); utime = stime = 0; @@ -1786,7 +1787,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; + maxrss = sig->maxrss; goto out; } @@ -1796,15 +1797,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; @@ -1814,14 +1815,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) thread_group_cputime_adjusted(p, &tgutime, &tgstime); utime += tgutime; stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; t = p; do { accumulate_thread_rusage(t, r); -- GitLab From d9fe6ef245766d4f4d0494aafc7d5b2189b9b94c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:50 +0100 Subject: [PATCH 0670/1418] getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand() [ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ] Patch series "getrusage: use sig->stats_lock", v2. This patch (of 2): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. This is also preparation for the next patch which changes getrusage() to use stats_lock instead of siglock, thread_group_cputime() takes the same lock. With the current implementation recursive read_seqbegin_or_lock() is fine, thread_group_cputime() can't enter the slow mode if the caller holds stats_lock, yet this looks more safe and better performance-wise. Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 177155ba50cd3..2646047fe5513 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1778,17 +1778,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; struct signal_struct *sig = p->signal; - memset((char *)r, 0, sizeof (*r)); + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = sig->maxrss; - goto out; + goto out_thread; } if (!lock_task_sighand(p, &flags)) @@ -1812,9 +1814,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; r->ru_nvcsw += sig->nvcsw; r->ru_nivcsw += sig->nivcsw; r->ru_minflt += sig->min_flt; @@ -1834,19 +1833,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) } unlock_task_sighand(p, &flags); -out: - r->ru_utime = ns_to_kernel_old_timeval(utime); - r->ru_stime = ns_to_kernel_old_timeval(stime); + if (who == RUSAGE_CHILDREN) + goto out_children; - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); } + +out_children: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ + r->ru_utime = ns_to_kernel_old_timeval(utime); + r->ru_stime = ns_to_kernel_old_timeval(stime); } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) -- GitLab From 2a304d8c922f2d13d6b95457022950350c23103b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:26:29 +0200 Subject: [PATCH 0671/1418] getrusage: use __for_each_thread() [ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ] do/while_each_thread should be avoided when possible. Plus this change allows to avoid lock_task_sighand(), we can use rcu and/or sig->stats_lock instead. Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 2646047fe5513..04102538cf43f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1822,10 +1822,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; - t = p; - do { + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); break; default: -- GitLab From 9793a3bb531c5b747bb726d7611b81abe32f6401 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:53 +0100 Subject: [PATCH 0672/1418] getrusage: use sig->stats_lock rather than lock_task_sighand() [ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ] lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call getrusage() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change getrusage() to use sig->stats_lock, it was specifically designed for this type of use. This way it runs lockless in the likely case. TODO: - Change do_task_stat() to use sig->stats_lock too, then we can remove spin_lock_irq(siglock) in wait_task_zombie(). - Turn sig->stats_lock into seqcount_rwlock_t, this way the readers in the slow mode won't exclude each other. See https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ - stats_lock has to disable irqs because ->siglock can be taken in irq context, it would be very nice to change __exit_signal() to avoid the siglock->stats_lock dependency. Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 04102538cf43f..d06eda1387b69 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1781,7 +1781,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long maxrss; struct mm_struct *mm; struct signal_struct *sig = p->signal; + unsigned int seq = 0; +retry: memset(r, 0, sizeof(*r)); utime = stime = 0; maxrss = 0; @@ -1793,8 +1795,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: @@ -1822,14 +1823,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; + + rcu_read_lock(); __for_each_thread(sig, t) accumulate_thread_rusage(t, r); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); + + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); if (who == RUSAGE_CHILDREN) goto out_children; -- GitLab From d95ef75162f4722af4b74d847173747930962405 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 18:45:01 +0200 Subject: [PATCH 0673/1418] fs/proc: do_task_stat: use __for_each_thread() [ Upstream commit 7904e53ed5a20fc678c01d5d1b07ec486425bb6a ] do/while_each_thread should be avoided when possible. Link: https://lkml.kernel.org/r/20230909164501.GA11581@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: 7601df8031fd ("fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats") Signed-off-by: Sasha Levin --- fs/proc/array.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 1b0d78dfd20f9..bcb645627991e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -526,12 +526,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t = task; - do { + struct task_struct *t; + + __for_each_thread(sig, t) { min_flt += t->min_flt; maj_flt += t->maj_flt; gtime += task_gtime(t); - } while_each_thread(task, t); + } min_flt += sig->min_flt; maj_flt += sig->maj_flt; -- GitLab From cf4b8c39b9a0bd81c47afc7ef62914a62dd5ec4d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:57 +0100 Subject: [PATCH 0674/1418] fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats [ Upstream commit 7601df8031fd67310af891897ef6cc0df4209305 ] lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call do_task_stat() at the same time and the process has NR_THREADS, it will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change do_task_stat() to use sig->stats_lock to gather the statistics outside of ->siglock protected section, in the likely case this code will run lockless. Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/proc/array.c | 58 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index bcb645627991e..d210b2f8b7ed5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, int permitted; struct mm_struct *mm; unsigned long long start_time; - unsigned long cmin_flt = 0, cmaj_flt = 0; - unsigned long min_flt = 0, maj_flt = 0; - u64 cutime, cstime, utime, stime; - u64 cgtime, gtime; + unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; + u64 cutime, cstime, cgtime, utime, stime, gtime; unsigned long rsslim = 0; unsigned long flags; int exit_code = task->exit_code; + struct signal_struct *sig = task->signal; + unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = 0; - cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; - if (sig->tty) { struct pid *pgrp = tty_get_pgrp(sig->tty); tty_pgrp = pid_nr_ns(pgrp, ns); @@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, num_threads = get_nr_threads(task); collect_sigign_sigcatch(task, &sigign, &sigcatch); - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); - /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t; - - __for_each_thread(sig, t) { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); - } - - min_flt += sig->min_flt; - maj_flt += sig->maj_flt; - gtime += sig->gtime; - if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) exit_code = sig->group_exit_code; } @@ -552,6 +530,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + rcu_read_lock(); + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } + rcu_read_unlock(); + } + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + if (whole) { thread_group_cputime_adjusted(task, &utime, &stime); } else { -- GitLab From d7543167affd372819a94879b8b1e8b9b12547d9 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 13 Mar 2024 07:42:17 -0400 Subject: [PATCH 0675/1418] Linux 6.1.82 Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: Mark Brown Tested-by: Pavel Machek (CIP) Tested-by: kernelci.org bot Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e13df565a1cb6..c5345f3ebed0d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 7512a70376f584589ada6363a8918fadd90189ed Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 12 May 2023 09:56:07 +0800 Subject: [PATCH 0676/1418] md: fix data corruption for raid456 when reshape restart while grow up [ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ] Currently, if reshape is interrupted, echo "reshape" to sync_action will restart reshape from scratch, for example: echo frozen > sync_action echo reshape > sync_action This will corrupt data before reshape_position if the array is growing, fix the problem by continue reshape from reshape_position. Reported-by: Peter Neuwirth Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 846bdee4daa0e..1c87f3e708094 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4903,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) return -EINVAL; err = mddev_lock(mddev); if (!err) { - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { err = -EBUSY; - else { + } else if (mddev->reshape_position == MaxSector || + mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev)) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); + } else { + /* + * If reshape is still in progress, and + * md_check_recovery() can continue to reshape, + * don't restart reshape because data can be + * corrupted for raid456. + */ + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } mddev_unlock(mddev); } -- GitLab From 1d467e10507167eb6dc2c281a87675b731955d86 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 29 May 2023 21:11:00 +0800 Subject: [PATCH 0677/1418] md/raid10: prevent soft lockup while flush writes [ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ] Currently, there is no limit for raid1/raid10 plugged bio. While flushing writes, raid1 has cond_resched() while raid10 doesn't, and too many writes can cause soft lockup. Follow up soft lockup can be triggered easily with writeback test for raid10 with ramdisks: watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293] Call Trace: call_rcu+0x16/0x20 put_object+0x41/0x80 __delete_object+0x50/0x90 delete_object_full+0x2b/0x40 kmemleak_free+0x46/0xa0 slab_free_freelist_hook.constprop.0+0xed/0x1a0 kmem_cache_free+0xfd/0x300 mempool_free_slab+0x1f/0x30 mempool_free+0x3a/0x100 bio_free+0x59/0x80 bio_put+0xcf/0x2c0 free_r10bio+0xbf/0xf0 raid_end_bio_io+0x78/0xb0 one_write_done+0x8a/0xa0 raid10_end_write_request+0x1b4/0x430 bio_endio+0x175/0x320 brd_submit_bio+0x3b9/0x9b7 [brd] __submit_bio+0x69/0xe0 submit_bio_noacct_nocheck+0x1e6/0x5a0 submit_bio_noacct+0x38c/0x7e0 flush_pending_writes+0xf0/0x240 raid10d+0xac/0x1ed0 Fix the problem by adding cond_resched() to raid10 like what raid1 did. Note that unlimited plugged bio still need to be optimized, for example, in the case of lots of dirty pages writeback, this will take lots of memory and io will spend a long time in plug, hence io latency is bad. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7b318e7e8d459..009f7ffe4e10c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf) raid1_submit_write(bio); bio = next; + cond_resched(); } blk_finish_plug(&plug); } else @@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) raid1_submit_write(bio); bio = next; + cond_resched(); } kfree(plug); } -- GitLab From 28fe81bcd3ea932e280f04e087ff0c75a4995a46 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2024 18:10:12 -0600 Subject: [PATCH 0678/1418] io_uring/unix: drop usage of io_uring socket Commit a4104821ad651d8a0b374f0b2474c345bbb42f82 upstream. Since we no longer allow sending io_uring fds over SCM_RIGHTS, move to using io_is_uring_fops() to detect whether this is a io_uring fd or not. With that done, kill off io_uring_get_socket() as nobody calls it anymore. This is in preparation to yanking out the rest of the core related to unix gc with io_uring. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/io_uring.h | 10 +++++----- io_uring/io_uring.c | 13 ------------- io_uring/io_uring.h | 1 - net/core/scm.c | 2 +- net/unix/scm.c | 4 +--- 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index a1484cdb3158e..a8f3058448eaa 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)); -struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); +bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) { @@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, { return -EOPNOTSUPP; } +static inline bool io_is_uring_fops(struct file *file) +{ + return false; +} static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t ret2, unsigned issue_flags) { @@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} static inline void io_uring_task_cancel(void) { } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35894955b4549..cf7dd62da0e37 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -153,19 +153,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static struct kmem_cache *req_cachep; -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (io_is_uring_fops(file)) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs)) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 019600570ee49..59e6f755f12c6 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); -bool io_is_uring_fops(struct file *file); bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req); void io_queue_iowq(struct io_kiocb *req, bool *dont_use); diff --git a/net/core/scm.c b/net/core/scm.c index e762a4b8a1d22..a877c4ef4c256 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; /* don't allow io_uring files */ - if (io_uring_get_socket(file)) { + if (io_is_uring_fops(file)) { fput(file); return -EINVAL; } diff --git a/net/unix/scm.c b/net/unix/scm.c index e8e2a00bb0f58..d1048b4c2baaf 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp) /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; - } else { - /* Could be an io_uring instance */ - u_sock = io_uring_get_socket(filp); } + return u_sock; } EXPORT_SYMBOL(unix_get_socket); -- GitLab From a3812a47a32022ca76bf46ddacdd823dc2aabf8b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2024 18:15:05 -0600 Subject: [PATCH 0679/1418] io_uring: drop any code related to SCM_RIGHTS Commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream. This is dead code after we dropped support for passing io_uring fds over SCM_RIGHTS, get rid of it. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/io_uring_types.h | 3 - io_uring/filetable.c | 10 +-- io_uring/io_uring.c | 31 +------ io_uring/rsrc.c | 151 +-------------------------------- io_uring/rsrc.h | 15 ---- 5 files changed, 8 insertions(+), 202 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f5b687a787a34..37aeea266ebb3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,9 +330,6 @@ struct io_ring_ctx { struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d6051..4660cb89ea9f5 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - ret = io_scm_file_account(ctx, file); - if (!ret) { - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - } + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + return 0; err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf7dd62da0e37..415248c1f82c6 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -2628,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3438,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. + * fd to gain access to the SQ/CQ ring details. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7ada0339b3870..ac658cfa89c63 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - err = io_scm_file_account(ctx, file); - if (err) { - fput(file); - break; - } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - /* skip scm accounted files, they'll be freed by ->ring_sock */ - if (!file || io_file_need_scm(file)) + if (!file) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) - kfree_skb(skb); - } -#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. We account only files that can hold other - * files because otherwise they can't form a loop and so are not interesting - * for GC. - */ -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) -{ -#if defined(CONFIG_UNIX) - struct sock *sk = ctx->ring_sock->sk; - struct sk_buff_head *head = &sk->sk_receive_queue; - struct scm_fp_list *fpl; - struct sk_buff *skb; - - if (likely(!io_file_need_scm(file))) - return 0; - - /* - * See if we can merge this file into an existing skb SCM_RIGHTS - * file set. If there's no room, fall back to allocating a new skb - * and filling it in. - */ - spin_lock_irq(&head->lock); - skb = skb_peek(head); - if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) - __skb_unlink(skb, head); - else - skb = NULL; - spin_unlock_irq(&head->lock); - - if (!skb) { - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - fpl->user = get_uid(current_user()); - fpl->max = SCM_MAX_FD; - fpl->count = 0; - - UNIXCB(skb).fp = fpl; - skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = unix_destruct_scm; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - } - - fpl = UNIXCB(skb).fp; - fpl->fp[fpl->count++] = get_file(file); - unix_inflight(fpl->user, file); - skb_queue_head(head, skb); - fput(file); -#endif - return 0; -} - static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; -#if defined(CONFIG_UNIX) - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff_head list, *head = &sock->sk_receive_queue; - struct sk_buff *skb; - int i; - - if (!io_file_need_scm(file)) { - fput(file); - return; - } - - __skb_queue_head_init(&list); - - /* - * Find the skb that holds this file in its SCM_RIGHTS. When found, - * remove this entry and rearrange the file array. - */ - skb = skb_dequeue(head); - while (skb) { - struct scm_fp_list *fp; - fp = UNIXCB(skb).fp; - for (i = 0; i < fp->count; i++) { - int left; - - if (fp->fp[i] != file) - continue; - - unix_notinflight(fp->user, fp->fp[i]); - left = fp->count - 1 - i; - if (left) { - memmove(&fp->fp[i], &fp->fp[i + 1], - left * sizeof(struct file *)); - } - fp->count--; - if (!fp->count) { - kfree_skb(skb); - skb = NULL; - } else { - __skb_queue_tail(&list, skb); - } - fput(file); - file = NULL; - break; - } - - if (!file) - break; - - __skb_queue_tail(&list, skb); - - skb = skb_dequeue(head); - } - - if (skb_peek(&list)) { - spin_lock_irq(&head->lock); - while ((skb = __skb_dequeue(&list)) != NULL) - __skb_queue_tail(head, skb); - spin_unlock_irq(&head->lock); - } -#else fput(file); -#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. If UNIX - * isn't enabled, then this causes a reference cycle and this - * instance can never get freed. If UNIX is enabled we'll - * handle it just fine, but there's still no point in allowing - * a ring fd as it doesn't support regular read/write anyway. + * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } - ret = io_scm_file_account(ctx, file); - if (ret) { - fput(file); - goto fail; - } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index acaf8dad05401..85f145607c620 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); - -static inline bool io_file_need_scm(struct file *filp) -{ - return false; -} - -static inline int io_scm_file_account(struct io_ring_ctx *ctx, - struct file *file) -{ - if (likely(!io_file_need_scm(file))) - return 0; - return __io_scm_file_account(ctx, file); -} - int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, -- GitLab From 7762c2d4cc0b6a1c4682e7fd01f0586f028aeba4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:33:47 -0500 Subject: [PATCH 0680/1418] nfsd: allow nfsd_file_get to sanely handle a NULL pointer [ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ] ...and remove some now-useless NULL pointer checks in its callers. Suggested-by: NeilBrown Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 ++--- fs/nfsd/nfs4state.c | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 697acf5c3c681..6e8712bd7c998 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -431,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(refcount_inc_not_zero(&nf->nf_ref))) + if (nf && refcount_inc_not_zero(&nf->nf_ref)) return nf; return NULL; } @@ -1086,8 +1086,7 @@ retry: rcu_read_lock(); nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); - if (nf) - nf = nfsd_file_get(nf); + nf = nfsd_file_get(nf); rcu_read_unlock(); if (nf) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b9d694ec25d19..e4522e86e984e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi) static struct nfsd_file * __nfs4_get_fd(struct nfs4_file *f, int oflag) { - if (f->fi_fds[oflag]) - return nfsd_file_get(f->fi_fds[oflag]); - return NULL; + return nfsd_file_get(f->fi_fds[oflag]); } static struct nfsd_file * -- GitLab From 19d22c5ba5c9c7a7132e2d75d6fc0b5afaa6dd1e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:09 -0500 Subject: [PATCH 0681/1418] nfsd: don't open-code clear_and_wake_up_bit [ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6e8712bd7c998..5b5d39ec7b010 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1174,9 +1174,7 @@ open_file: status = nfserr_jukebox; if (status != nfs_ok) nfsd_file_unhash(nf); - clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); - smp_mb__after_atomic(); - wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); goto out; } -- GitLab From fab03e0db0c2e8bcb651ea42634f5449cedc705f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:39:00 -0500 Subject: [PATCH 0682/1418] nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries [ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ] Since v4 files are expected to be long-lived, there's little value in closing them out of the cache when there is conflicting access. Change the comparator to also match the gc value in the key. Change both of the current users of that key to set the gc value in the key to "true". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5b5d39ec7b010..c36e3032d4386 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -175,6 +175,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, switch (key->type) { case NFSD_FILE_KEY_INODE: + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; if (nf->nf_inode != key->inode) return 1; break; @@ -695,6 +697,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, + .gc = true, }; struct nfsd_file *nf; @@ -1049,6 +1052,7 @@ nfsd_file_is_cached(struct inode *inode) struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, + .gc = true, }; bool ret = false; -- GitLab From 8a6c19f15766756acfad4f75c9d07fad0479a362 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:39:01 -0500 Subject: [PATCH 0683/1418] nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator [ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ] test_bit returns bool, so we can just compare the result of that to the key->gc value without the "!!". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c36e3032d4386..568963b8a4777 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -189,7 +189,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, return 1; if (!nfsd_match_cred(nf->nf_cred, key->cred)) return 1; - if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) return 1; if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) return 1; -- GitLab From ee84c44b4a0a320f6c733cd3b33d4d43db7a35fc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:11 -0500 Subject: [PATCH 0684/1418] nfsd: don't kill nfsd_files because of lease break error [ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ] An error from break_lease is non-fatal, so we needn't destroy the nfsd_file in that case. Just put the reference like we normally would and return the error. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 568963b8a4777..ab37b85b72077 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1102,7 +1102,7 @@ retry: nf = nfsd_file_alloc(&key, may_flags); if (!nf) { status = nfserr_jukebox; - goto out_status; + goto out; } ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, @@ -1111,13 +1111,11 @@ retry: if (likely(ret == 0)) goto open_file; - nfsd_file_slab_free(&nf->nf_rcu); - nf = NULL; if (ret == -EEXIST) goto retry; trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); status = nfserr_jukebox; - goto out_status; + goto construction_err; wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); @@ -1127,29 +1125,25 @@ wait_for_construction: trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); if (!open_retry) { status = nfserr_jukebox; - goto out; + goto construction_err; } open_retry = false; - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); goto retry; } - this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); + if (status != nfs_ok) { + nfsd_file_put(nf); + nf = NULL; + } + out: if (status == nfs_ok) { this_cpu_inc(nfsd_file_acquisitions); nfsd_file_check_write_error(nf); *pnf = nf; - } else { - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); - nf = NULL; } - -out_status: put_cred(key.cred); trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; @@ -1179,6 +1173,13 @@ open_file: if (status != nfs_ok) nfsd_file_unhash(nf); clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (status == nfs_ok) + goto out; + +construction_err: + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); + nf = NULL; goto out; } -- GitLab From 917dadb09e3114b9db2e22b84c7d84de937b3b33 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:12 -0500 Subject: [PATCH 0685/1418] nfsd: add some comments to nfsd_file_do_acquire [ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ] David Howells mentioned that he found this bit of code confusing, so sprinkle in some comments to clarify. Reported-by: David Howells Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ab37b85b72077..50349449a4e52 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1094,6 +1094,11 @@ retry: rcu_read_unlock(); if (nf) { + /* + * If the nf is on the LRU then it holds an extra reference + * that must be put if it's removed. It had better not be + * the last one however, since we should hold another. + */ if (nfsd_file_lru_remove(nf)) WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; -- GitLab From c01b3f0fef71cdf1bedbd34b30304eaafe2dd97a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jan 2023 12:31:37 -0500 Subject: [PATCH 0686/1418] nfsd: don't take/put an extra reference when putting a file [ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ] The last thing that filp_close does is an fput, so don't bother taking and putting the extra reference. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 50349449a4e52..51e2947c21a7d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -382,10 +382,8 @@ nfsd_file_free(struct nfsd_file *nf) if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { - get_file(nf->nf_file); - filp_close(nf->nf_file, NULL); nfsd_file_check_write_error(nf); - fput(nf->nf_file); + filp_close(nf->nf_file, NULL); } /* -- GitLab From 7cc95476337220ebb5c108077c7294343b4f2e66 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 26 Jan 2023 12:21:16 -0500 Subject: [PATCH 0687/1418] nfsd: update comment over __nfsd_file_cache_purge [ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 51e2947c21a7d..9b7082fdd2115 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -907,7 +907,8 @@ out_err: * @net: net-namespace to shut down the cache (may be NULL) * * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, - * then close out everything. Called when an nfsd instance is being shut down. + * then close out everything. Called when an nfsd instance is being shut down, + * and when the exports table is flushed. */ static void __nfsd_file_cache_purge(struct net *net) -- GitLab From f7ae480886873659b0ecff139a35adb173187b74 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 15 Feb 2023 06:53:54 -0500 Subject: [PATCH 0688/1418] nfsd: allow reaping files still under writeback [ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ] On most filesystems, there is no reason to delay reaping an nfsd_file just because its underlying inode is still under writeback. nfsd just relies on client activity or the local flusher threads to do writeback. The main exception is NFS, which flushes all of its dirty data on last close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to signal that they do this, and only skip closing files under writeback on such filesystems. Also, remove a redundant NULL file pointer check in nfsd_file_check_writeback, and clean up nfs's export op flag definitions. Signed-off-by: Jeff Layton Acked-by: Anna Schumaker [ cel: adjusted to apply to v6.1.y ] Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfs/export.c | 9 ++++++--- fs/nfsd/filecache.c | 12 +++++++++++- include/linux/exportfs.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1ed..9fe9586a51b71 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = { .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, .fetch_iversion = nfs_fetch_iversion, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| - EXPORT_OP_NOATOMIC_ATTR, + .flags = EXPORT_OP_NOWCC | + EXPORT_OP_NOSUBTREECHK | + EXPORT_OP_CLOSE_BEFORE_UNLINK | + EXPORT_OP_REMOTE_FS | + EXPORT_OP_NOATOMIC_ATTR | + EXPORT_OP_FLUSH_ON_CLOSE, }; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9b7082fdd2115..a6fa6e9802772 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -402,13 +402,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) struct file *file = nf->nf_file; struct address_space *mapping; - if (!file || !(file->f_mode & FMODE_WRITE)) + /* File not open for write? */ + if (!(file->f_mode & FMODE_WRITE)) return false; + + /* + * Some filesystems (e.g. NFS) flush all dirty data on close. + * On others, there is no need to wait for writeback. + */ + if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) + return false; + mapping = file->f_mapping; return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } + static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3a..218fc5c54e901 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,6 +221,7 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ +#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ unsigned long flags; }; -- GitLab From 0af5ee518165fb227a5cf30a414d284de93614ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Nov 2022 15:09:04 -0500 Subject: [PATCH 0689/1418] NFSD: Convert filecache to rhltable [ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ] While we were converting the nfs4_file hashtable to use the kernel's resizable hashtable data structure, Neil Brown observed that the list variant (rhltable) would be better for managing nfsd_file items as well. The nfsd_file hash table will contain multiple entries for the same inode -- these should be kept together on a list. And, it could be possible for exotic or malicious client behavior to cause the hash table to resize itself on every insertion. A nice simplification is that rhltable_lookup() can return a list that contains only nfsd_file items that match a given inode, which enables us to eliminate specialized hash table helper functions and use the default functions provided by the rhashtable implementation). Since we are now storing nfsd_file items for the same inode on a single list, that effectively reduces the number of hash entries that have to be tracked in the hash table. The mininum bucket count is therefore lowered. Light testing with fstests generic/531 show no regressions. Suggested-by: Neil Brown Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 311 ++++++++++++++++++-------------------------- fs/nfsd/filecache.h | 9 +- 2 files changed, 133 insertions(+), 187 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a6fa6e9802772..2f0b2d964cbb1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -74,70 +74,9 @@ static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static struct delayed_work nfsd_filecache_laundrette; -static struct rhashtable nfsd_file_rhash_tbl +static struct rhltable nfsd_file_rhltable ____cacheline_aligned_in_smp; -enum nfsd_file_lookup_type { - NFSD_FILE_KEY_INODE, - NFSD_FILE_KEY_FULL, -}; - -struct nfsd_file_lookup_key { - struct inode *inode; - struct net *net; - const struct cred *cred; - unsigned char need; - bool gc; - enum nfsd_file_lookup_type type; -}; - -/* - * The returned hash value is based solely on the address of an in-code - * inode, a pointer to a slab-allocated object. The entropy in such a - * pointer is concentrated in its middle bits. - */ -static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) -{ - unsigned long ptr = (unsigned long)inode; - u32 k; - - k = ptr >> L1_CACHE_SHIFT; - k &= 0x00ffffff; - return jhash2(&k, 1, seed); -} - -/** - * nfsd_file_key_hashfn - Compute the hash value of a lookup key - * @data: key on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file_lookup_key *key = data; - - return nfsd_file_inode_hash(key->inode, seed); -} - -/** - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file - * @data: object on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file *nf = data; - - return nfsd_file_inode_hash(nf->nf_inode, seed); -} - static bool nfsd_match_cred(const struct cred *c1, const struct cred *c2) { @@ -158,55 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2) return true; } -/** - * nfsd_file_obj_cmpfn - Match a cache item against search criteria - * @arg: search criteria - * @ptr: cache item to check - * - * Return values: - * %0 - Item matches search criteria - * %1 - Item does not match search criteria - */ -static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, - const void *ptr) -{ - const struct nfsd_file_lookup_key *key = arg->key; - const struct nfsd_file *nf = ptr; - - switch (key->type) { - case NFSD_FILE_KEY_INODE: - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) - return 1; - if (nf->nf_inode != key->inode) - return 1; - break; - case NFSD_FILE_KEY_FULL: - if (nf->nf_inode != key->inode) - return 1; - if (nf->nf_may != key->need) - return 1; - if (nf->nf_net != key->net) - return 1; - if (!nfsd_match_cred(nf->nf_cred, key->cred)) - return 1; - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) - return 1; - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) - return 1; - break; - } - return 0; -} - static const struct rhashtable_params nfsd_file_rhash_params = { .key_len = sizeof_field(struct nfsd_file, nf_inode), .key_offset = offsetof(struct nfsd_file, nf_inode), - .head_offset = offsetof(struct nfsd_file, nf_rhash), - .hashfn = nfsd_file_key_hashfn, - .obj_hashfn = nfsd_file_obj_hashfn, - .obj_cmpfn = nfsd_file_obj_cmpfn, - /* Reduce resizing churn on light workloads */ - .min_size = 512, /* buckets */ + .head_offset = offsetof(struct nfsd_file, nf_rlist), + + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, .automatic_shrinking = true, }; @@ -309,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) } static struct nfsd_file * -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, + bool want_gc) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); - if (nf) { - INIT_LIST_HEAD(&nf->nf_lru); - nf->nf_birthtime = ktime_get(); - nf->nf_file = NULL; - nf->nf_cred = get_current_cred(); - nf->nf_net = key->net; - nf->nf_flags = 0; - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - if (key->gc) - __set_bit(NFSD_FILE_GC, &nf->nf_flags); - nf->nf_inode = key->inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = key->need; - nf->nf_mark = NULL; - } + if (unlikely(!nf)) + return NULL; + + INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); + nf->nf_file = NULL; + nf->nf_cred = get_current_cred(); + nf->nf_net = net; + nf->nf_flags = want_gc ? + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); + nf->nf_inode = inode; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = need; + nf->nf_mark = NULL; return nf; } @@ -354,8 +254,8 @@ static void nfsd_file_hash_remove(struct nfsd_file *nf) { trace_nfsd_file_unhash(nf); - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, - nfsd_file_rhash_params); + rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); } static bool @@ -688,8 +588,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out * - * An nfsd_file represents a struct file being held open on behalf of nfsd. An - * open file however can block other activity (such as leases), or cause + * An nfsd_file represents a struct file being held open on behalf of nfsd. + * An open file however can block other activity (such as leases), or cause * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). * * This function is intended to find open nfsd_files when this sort of @@ -702,21 +602,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) static void nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - .gc = true, - }; + struct rhlist_head *tmp, *list; struct nfsd_file *nf; rcu_read_lock(); - do { - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - if (!nf) - break; + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) + continue; nfsd_file_cond_queue(nf, dispose); - } while (1); + } rcu_read_unlock(); } @@ -840,7 +736,7 @@ nfsd_file_cache_init(void) if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; - ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); if (ret) return ret; @@ -908,7 +804,7 @@ out_err: nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); goto out; } @@ -927,7 +823,7 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); - rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -1033,7 +929,7 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; @@ -1044,6 +940,35 @@ nfsd_file_cache_shutdown(void) } } +static struct nfsd_file * +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, + struct inode *inode, unsigned char need, + bool want_gc) +{ + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; + + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (nf->nf_may != need) + continue; + if (nf->nf_net != net) + continue; + if (!nfsd_match_cred(nf->nf_cred, cred)) + continue; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) + continue; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + continue; + + if (!nfsd_file_get(nf)) + continue; + return nf; + } + return NULL; +} + /** * nfsd_file_is_cached - are there any cached open files for this inode? * @inode: inode to check @@ -1058,16 +983,20 @@ nfsd_file_cache_shutdown(void) bool nfsd_file_is_cached(struct inode *inode) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - .gc = true, - }; + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; bool ret = false; - if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params) != NULL) - ret = true; + rcu_read_lock(); + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { + ret = true; + break; + } + rcu_read_unlock(); + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -1077,14 +1006,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf, bool want_gc) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_FULL, - .need = may_flags & NFSD_FILE_MAY_MASK, - .net = SVC_NET(rqstp), - .gc = want_gc, - }; + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; + struct net *net = SVC_NET(rqstp); + struct nfsd_file *new, *nf; + const struct cred *cred; bool open_retry = true; - struct nfsd_file *nf; + struct inode *inode; __be32 status; int ret; @@ -1092,14 +1019,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; - key.inode = d_inode(fhp->fh_dentry); - key.cred = get_current_cred(); + inode = d_inode(fhp->fh_dentry); + cred = get_current_cred(); retry: rcu_read_lock(); - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - nf = nfsd_file_get(nf); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); rcu_read_unlock(); if (nf) { @@ -1113,21 +1038,32 @@ retry: goto wait_for_construction; } - nf = nfsd_file_alloc(&key, may_flags); - if (!nf) { + new = nfsd_file_alloc(net, inode, need, want_gc); + if (!new) { status = nfserr_jukebox; goto out; } - ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, - &key, &nf->nf_rhash, - nfsd_file_rhash_params); + rcu_read_lock(); + spin_lock(&inode->i_lock); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + if (unlikely(nf)) { + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + nfsd_file_slab_free(&new->nf_rcu); + goto wait_for_construction; + } + nf = new; + ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); + spin_unlock(&inode->i_lock); + rcu_read_unlock(); if (likely(ret == 0)) goto open_file; if (ret == -EEXIST) goto retry; - trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); + trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; goto construction_err; @@ -1136,7 +1072,7 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); + trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); if (!open_retry) { status = nfserr_jukebox; goto construction_err; @@ -1158,13 +1094,13 @@ out: nfsd_file_check_write_error(nf); *pnf = nf; } - put_cred(key.cred); - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + put_cred(cred); + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); if (nf->nf_mark) { if (file) { get_file(file); @@ -1182,7 +1118,7 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status == nfs_ok && key.inode->i_nlink == 0) + if (status != nfs_ok || inode->i_nlink == 0) status = nfserr_jukebox; if (status != nfs_ok) nfsd_file_unhash(nf); @@ -1209,8 +1145,11 @@ construction_err: * seconds after the final nfsd_file_put() in case the caller * wants to re-use it. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1230,8 +1169,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, * but not garbage-collected. The object is unhashed after the * final nfsd_file_put(). * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1252,8 +1194,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * and @file is non-NULL, use it to instantiate a new nfsd_file instead of * opening a new one. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1284,7 +1229,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) lru = list_lru_count(&nfsd_file_lru); rcu_read_lock(); - ht = &nfsd_file_rhash_tbl; + ht = &nfsd_file_rhltable.ht; count = atomic_read(&ht->nelems); tbl = rht_dereference_rcu(ht->tbl, ht); buckets = tbl->size; @@ -1300,7 +1245,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) evictions += per_cpu(nfsd_file_evictions, i); } - seq_printf(m, "total entries: %u\n", count); + seq_printf(m, "total inodes: %u\n", count); seq_printf(m, "hash buckets: %u\n", buckets); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 41516a4263ea5..e54165a3224f0 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,9 +29,8 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct rhash_head nf_rhash; - struct list_head nf_lru; - struct rcu_head nf_rcu; + struct rhlist_head nf_rlist; + void *nf_inode; struct file *nf_file; const struct cred *nf_cred; struct net *nf_net; @@ -40,10 +39,12 @@ struct nfsd_file { #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) unsigned long nf_flags; - struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; + struct nfsd_file_mark *nf_mark; + struct list_head nf_lru; + struct rcu_head nf_rcu; ktime_t nf_birthtime; }; -- GitLab From 448f1dcd6240e3ab4c78d4052167fd33e7c824c7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Apr 2023 17:31:44 -0400 Subject: [PATCH 0690/1418] nfsd: simplify the delayed disposal list code [ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ] When queueing a dispose list to the appropriate "freeme" lists, it pointlessly queues the objects one at a time to an intermediate list. Remove a few helpers and just open code a list_move to make it more clear and efficient. Better document the resulting functions with kerneldoc comments. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 64 ++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2f0b2d964cbb1..f40d8f3b35a4c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -402,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose) } } -static void -nfsd_file_list_remove_disposal(struct list_head *dst, - struct nfsd_fcache_disposal *l) -{ - spin_lock(&l->lock); - list_splice_init(&l->freeme, dst); - spin_unlock(&l->lock); -} - -static void -nfsd_file_list_add_disposal(struct list_head *files, struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd_fcache_disposal *l = nn->fcache_disposal; - - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); -} - -static void -nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, - struct net *net) -{ - struct nfsd_file *nf, *tmp; - - list_for_each_entry_safe(nf, tmp, src, nf_lru) { - if (nf->nf_net == net) - list_move_tail(&nf->nf_lru, dst); - } -} - +/** + * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list + * @dispose: list of nfsd_files to be disposed + * + * Transfers each file to the "freeme" list for its nfsd_net, to eventually + * be disposed of by the per-net garbage collector. + */ static void nfsd_file_dispose_list_delayed(struct list_head *dispose) { - LIST_HEAD(list); - struct nfsd_file *nf; - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); - nfsd_file_list_add_disposal(&list, nf->nf_net); + struct nfsd_file *nf = list_first_entry(dispose, + struct nfsd_file, nf_lru); + struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + spin_lock(&l->lock); + list_move_tail(&nf->nf_lru, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } } @@ -665,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and destroy any entries that have not been used since - * the last scan. + * Scrape the freeme list for this nfsd_net, and then dispose of them + * all. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -675,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work) struct nfsd_fcache_disposal *l = container_of(work, struct nfsd_fcache_disposal, work); - nfsd_file_list_remove_disposal(&head, l); + spin_lock(&l->lock); + list_splice_init(&l->freeme, &head); + spin_unlock(&l->lock); + nfsd_file_dispose_list(&head); } -- GitLab From 37085bbd92b3e7c31309c85d1a8273aa0ed213f5 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 19 Apr 2023 10:53:18 -0700 Subject: [PATCH 0691/1418] NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop [ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ] The following request sequence to the same file causes the NFS client and server getting into an infinite loop with COMMIT and NFS4ERR_DELAY: OPEN REMOVE WRITE COMMIT Problem reported by recall11, recall12, recall14, recall20, recall22, recall40, recall42, recall48, recall50 of nfstest suite. This patch restores the handling of race condition in nfsd_file_do_acquire with unlink to that prior of the regression. Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache") Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f40d8f3b35a4c..ee9c923192e08 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1099,8 +1099,6 @@ open_file: * then unhash. */ if (status != nfs_ok || inode->i_nlink == 0) - status = nfserr_jukebox; - if (status != nfs_ok) nfsd_file_unhash(nf); clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) -- GitLab From 96e18f236178a5d444a15547413b4e7101da611a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:39 -0400 Subject: [PATCH 0692/1418] NFSD: Add an nfsd4_encode_nfstime4() helper [ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ] Clean up: de-duplicate some common code. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 597f14a80512f..514f4456cf5c6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -3346,11 +3360,9 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3359,25 +3371,19 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); - *p++ = cpu_to_be32(stat.btime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- GitLab From 806a0a1819babb6defff385c8d74bf82e0604dec Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 23 Jun 2023 17:09:06 -0400 Subject: [PATCH 0693/1418] nfsd: Fix creation time serialization order In nfsd4_encode_fattr(), TIME_CREATE was being written out after all other times. However, they should be written out in an order that matches the bit flags in bmval1, which in this case are #define FATTR4_WORD1_TIME_ACCESS (1UL << 15) #define FATTR4_WORD1_TIME_CREATE (1UL << 18) #define FATTR4_WORD1_TIME_DELTA (1UL << 19) #define FATTR4_WORD1_TIME_METADATA (1UL << 20) #define FATTR4_WORD1_TIME_MODIFY (1UL << 21) so TIME_CREATE should come second. I noticed this on a FreeBSD NFSv4.2 client, which supports creation times. On this client, file times were weirdly permuted. With this patch applied on the server, times looked normal on the client. Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute") Link: https://unix.stackexchange.com/q/749605/56202 Signed-off-by: Tavian Barnes Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 514f4456cf5c6..4ed9fef14adc2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3364,6 +3364,11 @@ out_acl: if (status) goto out; } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; + } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); if (!p) @@ -3380,11 +3385,6 @@ out_acl: if (status) goto out; } - if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - status = nfsd4_encode_nfstime4(xdr, &stat.btime); - if (status) - goto out; - } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- GitLab From abd34206f396d3ae50cddbd5aa840b8cd7f68c63 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:01 +0100 Subject: [PATCH 0694/1418] media: rkisp1: Fix IRQ handling due to shared interrupts [ Upstream commit ffb635bb398fc07cb38f8a7b4a82cbe5f412f08e ] The driver requests the interrupts as IRQF_SHARED, so the interrupt handlers can be called at any time. If such a call happens while the ISP is powered down, the SoC will hang as the driver tries to access the ISP registers. This can be reproduced even without the platform sharing the IRQ line: Enable CONFIG_DEBUG_SHIRQ and unload the driver, and the board will hang. Fix this by adding a new field, 'irqs_enabled', which is used to bail out from the interrupt handler when the ISP is not operational. Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-2-173007628248@ideasonboard.com Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- .../platform/rockchip/rkisp1/rkisp1-capture.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-common.h | 2 ++ .../platform/rockchip/rkisp1/rkisp1-csi.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-dev.c | 22 +++++++++++++++++++ .../platform/rockchip/rkisp1/rkisp1-isp.c | 3 +++ 5 files changed, 33 insertions(+) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c index d4540684ea9af..0bcb9db5ad190 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c @@ -701,6 +701,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx) unsigned int i; u32 status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index f9ec1c6138947..5776292f914a4 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -467,6 +467,7 @@ struct rkisp1_debug { * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information * @irqs: IRQ line numbers + * @irqs_enabled: the hardware is enabled and can cause interrupts */ struct rkisp1_device { void __iomem *base_addr; @@ -488,6 +489,7 @@ struct rkisp1_device { struct rkisp1_debug debug; const struct rkisp1_info *info; int irqs[RKISP1_NUM_IRQS]; + bool irqs_enabled; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index e862f515cc6d3..95b6e41c48ec2 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -211,6 +211,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 val, status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 41abb18b00acb..7a3b69ba51b97 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev) { struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); + rkisp1->irqs_enabled = false; + /* Make sure the IRQ handler will see the above */ + mb(); + + /* + * Wait until any running IRQ handler has returned. The IRQ handler + * may get called even after this (as it's a shared interrupt line) + * but the 'irqs_enabled' flag will make the handler return immediately. + */ + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (rkisp1->irqs[il] == -1) + continue; + + /* Skip if the irq line is the same as previous */ + if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il]) + synchronize_irq(rkisp1->irqs[il]); + } + clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks); return pinctrl_pm_select_sleep_state(dev); } @@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev) if (ret) return ret; + rkisp1->irqs_enabled = true; + /* Make sure the IRQ handler will see the above */ + mb(); + return 0; } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index 00dca284c1222..2af5c1a48070b 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -1023,6 +1023,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 status, isp_err; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS); if (!status) return IRQ_NONE; -- GitLab From d23425dab9975e3abff49fd416856c020dd90dd1 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Fri, 9 Feb 2024 17:11:09 +0000 Subject: [PATCH 0695/1418] perf/arm-cmn: Workaround AmpereOneX errata AC04_MESH_1 (incorrect child count) [ Upstream commit 50572064ec7109b00eef8880e905f55861c8b3de ] AmpereOneX mesh implementation has a bug in HN-P nodes that makes them report incorrect child count. The failing crosspoints report 8 children while they only have two. When the driver tries to access the inexistent child nodes, it believes it has reached an invalid node type and probing fails. The workaround is to ignore those incorrect child nodes and continue normally. Signed-off-by: Ilkka Koskinen [ rm: rewrote simpler generalised version ] Tested-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ce4b1442135fe03d0de41859b04b268c88c854a3.1707498577.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 47e7c3206939f..899e4ed49905c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2178,6 +2178,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); continue; } + /* + * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus + * child count larger than the number of valid child pointers. + * A child offset of 0 can only occur on CMN-600; otherwise it + * would imply the root node being its own grandchild, which + * we can safely dismiss in general. + */ + if (reg == 0 && cmn->part != PART_CMN600) { + dev_dbg(cmn->dev, "bogus child pointer?\n"); + continue; + } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); -- GitLab From c4b96f7eaba0fbbf948b5b1c13eecd614b3582e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:23 -0800 Subject: [PATCH 0696/1418] selftests: tls: use exact comparison in recv_partial [ Upstream commit 49d821064c44cb5ffdf272905236012ea9ce50e3 ] This exact case was fail for async crypto and we weren't catching it. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5b80fb155d549..d89ee6e1926c7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -926,12 +926,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } -- GitLab From aa9e9c776442da3c79a08fa3f68512959ab5e990 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 11 Feb 2024 22:27:35 +0100 Subject: [PATCH 0697/1418] ASoC: rt5645: Make LattePanda board DMI match more precise [ Upstream commit 551539a8606e28cb2a130f8ef3e9834235b456c4 ] The DMI strings used for the LattePanda board DMI quirks are very generic. Using the dmidecode database from https://linux-hardware.org/ shows that the chosen DMI strings also match the following 2 laptops which also have a rt5645 codec: Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04 Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934 All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS version DMI strings: DF-BI-7-S70CR100-* DF-BI-7-S70CR110-* DF-BI-7-S70CR200-* LP-BS-7-S70CR700-* See e.g. https://linux-hardware.org/?computer=D98250A817C0 Add a partial (non exact) DMI match on this string to make the LattePanda board DMI match more precise to avoid false-positive matches. Signed-off-by: Hans de Goede Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 844d14d4c9a51..aac9140749968 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3802,6 +3802,16 @@ static const struct dmi_system_id dmi_platform_data[] = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"), + /* + * Above strings are too generic, LattePanda BIOS versions for + * all 4 hw revisions are: + * DF-BI-7-S70CR100-* + * DF-BI-7-S70CR110-* + * DF-BI-7-S70CR200-* + * LP-BS-7-S70CR700-* + * Do a partial match for S70CR to avoid false positive matches. + */ + DMI_MATCH(DMI_BIOS_VERSION, "S70CR"), }, .driver_data = (void *)&lattepanda_board_platform_data, }, -- GitLab From 7056108e01779e9188f37c22968e131f188bc6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20T=C5=91k=C3=A9s?= Date: Sat, 10 Feb 2024 21:36:38 +0200 Subject: [PATCH 0698/1418] ASoC: amd: yc: Fix non-functional mic on Lenovo 82UU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f7fe85b229bc30cb5dc95b4e9015a601c9e3a8cd ] Like many other models, the Lenovo 82UU (Yoga Slim 7 Pro 14ARH7) needs a quirk entry for the internal microphone to function. Signed-off-by: Attila Tőkés Link: https://msgid.link/r/20240210193638.144028-1-attitokes@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 28da4e1858d7e..5921af7fd92c5 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -227,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UU"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From f49c513f46dc19bf01ffad2aaaf234d7f37f6799 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 17:49:48 +0800 Subject: [PATCH 0699/1418] x86/xen: Add some null pointer checking to smp.c [ Upstream commit 3693bb4465e6e32a204a5b86d3ec7e6b9f7e67c2 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401161119.iof6BQsf-lkp@intel.com/ Suggested-by: Markus Elfring Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240119094948.275390-1-chentao@kylinos.cn Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/smp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4b0d6fff88de5..1fb9a1644d944 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu) char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + if (!resched_name) + goto fail_mem; per_cpu(xen_resched_irq, cpu).name = resched_name; rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, @@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_resched_irq, cpu).irq = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + if (!callfunc_name) + goto fail_mem; per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, cpu, @@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu) if (!xen_fifo_events) { debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + if (!debug_name) + goto fail_mem; + per_cpu(xen_debug_irq, cpu).name = debug_name; rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu) } callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); + if (!callfunc_name) + goto fail_mem; + per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, cpu, @@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu) return 0; + fail_mem: + rc = -ENOMEM; fail: xen_smp_intr_free(cpu); return rc; -- GitLab From 9a07188311af2d978e84cb91ff7e54eda01f2876 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:27 +0000 Subject: [PATCH 0700/1418] MIPS: Clear Cause.BD in instruction_pointer_set [ Upstream commit 9d6e21ddf20293b3880ae55b9d14de91c5891c59 ] Clear Cause.BD after we use instruction_pointer_set to override EPC. This can prevent exception_epc check against instruction code at new return address. It won't be considered as "in delay slot" after epc being overridden anyway. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/ptrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index daf3cf244ea97..b3e4dd6be7e20 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->cp0_epc = val; + regs->cp0_cause &= ~CAUSEF_BD; } /* Query offset/name of register from its name/offset */ -- GitLab From d1614e1fd6c363549311c45a38c321f1c5be5774 Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Sun, 11 Feb 2024 19:04:29 +0000 Subject: [PATCH 0701/1418] HID: multitouch: Add required quirk for Synaptics 0xcddc device [ Upstream commit 1741a8269e1c51fa08d4bfdf34667387a6eb10ec ] Add support for the pointing stick (Accupoint) and 2 mouse buttons. Present on some Toshiba/dynabook Portege X30 and X40 laptops. It should close https://bugzilla.kernel.org/show_bug.cgi?id=205817 Signed-off-by: Manuel Fombuena Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5ec1f174127a3..3816fd06bc953 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2153,6 +2153,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcddc) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, -- GitLab From 6ce8cc5e6251229d5239d141d0e5102a0ffa1c7f Mon Sep 17 00:00:00 2001 From: Andrew Ballance Date: Tue, 13 Feb 2024 19:23:05 -0600 Subject: [PATCH 0702/1418] gen_compile_commands: fix invalid escape sequence warning [ Upstream commit dae4a0171e25884787da32823b3081b4c2acebb2 ] With python 3.12, '\#' results in this warning SyntaxWarning: invalid escape sequence '\#' Signed-off-by: Andrew Ballance Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/clang-tools/gen_compile_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index d800b2c0af977..4f414ab706bd8 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path): # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the # kernel version). The compile_commands.json file is not interepreted # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') + prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') # Use os.path.abspath() to normalize the path resolving '.' and '..' . abs_path = os.path.abspath(os.path.join(root_directory, file_path)) -- GitLab From 0e5b11ff7354021b77c1d4f8b5976c2f491d11c8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 18:24:38 +0000 Subject: [PATCH 0703/1418] arm64/sve: Lower the maximum allocation for the SVE ptrace regset [ Upstream commit 2813926261e436d33bc74486b51cce60b76edf78 ] Doug Anderson observed that ChromeOS crashes are being reported which include failing allocations of order 7 during core dumps due to ptrace allocating storage for regsets: chrome: page allocation failure: order:7, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=urgent,mems_allowed=0 ... regset_get_alloc+0x1c/0x28 elf_core_dump+0x3d8/0xd8c do_coredump+0xeb8/0x1378 with further investigation showing that this is: [ 66.957385] DOUG: Allocating 279584 bytes which is the maximum size of the SVE regset. As Doug observes it is not entirely surprising that such a large allocation of contiguous memory might fail on a long running system. The SVE regset is currently sized to hold SVE registers with a VQ of SVE_VQ_MAX which is 512, substantially more than the architectural maximum of 16 which we might see even in a system emulating the limits of the architecture. Since we don't expose the size we tell the regset core externally let's define ARCH_SVE_VQ_MAX with the actual architectural maximum and use that for the regset, we'll still overallocate most of the time but much less so which will be helpful even if the core is fixed to not require contiguous allocations. Specify ARCH_SVE_VQ_MAX in terms of the maximum value that can be written into ZCR_ELx.LEN (where this is set in the hardware). For consistency update the maximum SME vector length to be specified in the same style while we are at it. We could also teach the ptrace core about runtime discoverable regset sizes but that would be a more invasive change and this is being observed in practical systems. Reported-by: Doug Anderson Signed-off-by: Mark Brown Tested-by: Douglas Anderson Link: https://lore.kernel.org/r/20240213-arm64-sve-ptrace-regset-size-v2-1-c7600ca74b9b@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/fpsimd.h | 12 ++++++------ arch/arm64/kernel/ptrace.c | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index da18413712c04..930b0e6c94622 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -36,13 +36,13 @@ * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e1f6366b7ccdf..d02dd2be17b3b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1450,7 +1450,8 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, + SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, -- GitLab From 6d6aa6c0bfd653890372360c52f0ca8a566c8b3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 16:04:14 +0100 Subject: [PATCH 0704/1418] soc: microchip: Fix POLARFIRE_SOC_SYS_CTRL input prompt [ Upstream commit 6dd9a236042e305d7b69ee92db7347bf5943e7d3 ] The symbol's prompt should be a one-line description, instead of just duplicating the symbol name. Signed-off-by: Geert Uytterhoeven Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- drivers/soc/microchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index eb656b33156ba..f19e74d342aa2 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -1,5 +1,5 @@ config POLARFIRE_SOC_SYS_CTRL - tristate "POLARFIRE_SOC_SYS_CTRL" + tristate "Microchip PolarFire SoC (MPFS) system controller support" depends on POLARFIRE_SOC_MAILBOX help This driver adds support for the PolarFire SoC (MPFS) system controller. -- GitLab From cad82f1671e41094acd3b9a60cd27d67a3c64a21 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: [PATCH 0705/1418] RDMA/mlx5: Fix fortify source warning while accessing Eth segment [ Upstream commit 4d5e86a56615cc387d21c629f9af8fb0e958d350 ] ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/qp.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 855f3f4fefadd..737db67a9ce1d 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4657d5c54abef..ca0eee571ad7b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; -- GitLab From 6d4c7bd6dc35427b76b16bd69e5552af53c11d5b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Jan 2024 11:29:13 +0200 Subject: [PATCH 0706/1418] RDMA/mlx5: Relax DEVX access upon modify commands [ Upstream commit be551ee1574280ef8afbf7c271212ac3e38933ef ] Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ. The kernel doesn't need to protect what firmware protects, or what causes no damage to anyone but the user. As firmware needs to protect itself from parallel access to the same object, don't block parallel modify/query commands on the same object in the kernel side. This change will allow user space application to run parallel updates to different entries in the same bulk object. Tested-by: Tamar Mashiah Signed-off-by: Yishai Hadas Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f8e2baed27a5c..7013ce20549bd 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2951,7 +2951,7 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, UVERBS_IDR_ANY_OBJECT, - UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, -- GitLab From d35d346b5df607bb612fd587561ec8b7917be6e3 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 13 Feb 2024 19:45:40 +0000 Subject: [PATCH 0707/1418] riscv: dts: sifive: add missing #interrupt-cells to pmic [ Upstream commit ce6b6d1513965f500a05f3facf223fa01fd74920 ] At W=2 dtc complains: hifive-unmatched-a00.dts:120.10-238.4: Warning (interrupt_provider): /soc/i2c@10030000/pmic@58: Missing '#interrupt-cells' in interrupt provider Add the missing property. Reviewed-by: Samuel Holland Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 07387f9c135ca..72b87b08ab444 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -123,6 +123,7 @@ interrupt-parent = <&gpio>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; -- GitLab From 46c8615de5bf7ee2076750bd3024e5912d7ee4eb Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:33 +0800 Subject: [PATCH 0708/1418] x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h [ Upstream commit ee0e39a63b78849f8abbef268b13e4838569f646 ] Move is_vsyscall_vaddr() into asm/vsyscall.h to make it available for copy_from_kernel_nofault_allowed() in arch/x86/mm/maccess.c. Reviewed-by: Sohil Mehta Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240202103935.3154011-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/x86/include/asm/vsyscall.h | 10 ++++++++++ arch/x86/mm/fault.c | 9 --------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb9..472f0263dbc61 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbbad73192a1..f20636510eb1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -818,15 +818,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) -- GitLab From f175de546a3eb77614d94d4c02550181c0a8493e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:34 +0800 Subject: [PATCH 0709/1418] x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault() [ Upstream commit 32019c659ecfe1d92e3bf9fcdfbb11a7c70acd58 ] When trying to use copy_from_kernel_nofault() to read vsyscall page through a bpf program, the following oops was reported: BUG: unable to handle page fault for address: ffffffffff600000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 3231067 P4D 3231067 PUD 3233067 PMD 3235067 PTE 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 20390 Comm: test_progs ...... 6.7.0+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:copy_from_kernel_nofault+0x6f/0x110 ...... Call Trace: ? copy_from_kernel_nofault+0x6f/0x110 bpf_probe_read_kernel+0x1d/0x50 bpf_prog_2061065e56845f08_do_probe_read+0x51/0x8d trace_call_bpf+0xc5/0x1c0 perf_call_bpf_enter.isra.0+0x69/0xb0 perf_syscall_enter+0x13e/0x200 syscall_trace_enter+0x188/0x1c0 do_syscall_64+0xb5/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 ...... ---[ end trace 0000000000000000 ]--- The oops is triggered when: 1) A bpf program uses bpf_probe_read_kernel() to read from the vsyscall page and invokes copy_from_kernel_nofault() which in turn calls __get_user_asm(). 2) Because the vsyscall page address is not readable from kernel space, a page fault exception is triggered accordingly. 3) handle_page_fault() considers the vsyscall page address as a user space address instead of a kernel space address. This results in the fix-up setup by bpf not being applied and a page_fault_oops() is invoked due to SMAP. Considering handle_page_fault() has already considered the vsyscall page address as a userspace address, fix the problem by disallowing vsyscall page read for copy_from_kernel_nofault(). Originally-by: Thomas Gleixner Reported-by: syzbot+72aa0161922eba61b50e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAG48ez06TZft=ATH1qh2c5mpS5BT8UakwNkzi6nvK5_djC-4Nw@mail.gmail.com Reported-by: xingwei lee Closes: https://lore.kernel.org/bpf/CABOYnLynjBoFZOf3Z4BhaZkc5hx_kHfsjiW+UWLoB=w33LvScw@mail.gmail.com Signed-off-by: Hou Tao Reviewed-by: Sohil Mehta Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240202103935.3154011-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/x86/mm/maccess.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec9..42115ac079cfe 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include #include +#include + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) return false; + /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early -- GitLab From 05896c8ff4ed16ce622169c072046f8b86d8cde7 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 14 Feb 2024 17:32:40 +0100 Subject: [PATCH 0710/1418] net/iucv: fix the allocation size of iucv_path_table array [ Upstream commit b4ea9b6a18ebf7f9f3a7a60f82e925186978cfcf ] iucv_path_table is a dynamically allocated array of pointers to struct iucv_path items. Yet, its size is calculated as if it was an array of struct iucv_path items. Signed-off-by: Alexander Gordeev Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fc3fddeb6f36d..f66b5f74cd83a 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID"; static LIST_HEAD(iucv_handler_list); /* - * iucv_path_table: an array of iucv_path structures. + * iucv_path_table: array of pointers to iucv_path structures. */ static struct iucv_path **iucv_path_table; static unsigned long iucv_max_pathid; @@ -544,7 +544,7 @@ static int iucv_enable(void) cpus_read_lock(); rc = -ENOMEM; - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); if (!iucv_path_table) goto out; -- GitLab From 4492f21263186a84237f3167aed7b0cb455869c5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Sun, 11 Feb 2024 23:43:14 +0100 Subject: [PATCH 0711/1418] parisc/ftrace: add missing CONFIG_DYNAMIC_FTRACE check [ Upstream commit 250f5402e636a5cec9e0e95df252c3d54307210f ] Fixes a bug revealed by -Wmissing-prototypes when CONFIG_FUNCTION_GRAPH_TRACER is enabled but not CONFIG_DYNAMIC_FTRACE: arch/parisc/kernel/ftrace.c:82:5: error: no previous prototype for 'ftrace_enable_ftrace_graph_caller' [-Werror=missing-prototypes] 82 | int ftrace_enable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/parisc/kernel/ftrace.c:88:5: error: no previous prototype for 'ftrace_disable_ftrace_graph_caller' [-Werror=missing-prototypes] 88 | int ftrace_disable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Max Kellermann Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4d392e4ed3584..ac7253891d5ed 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -78,7 +78,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER) int ftrace_enable_ftrace_graph_caller(void) { static_key_enable(&ftrace_graph_enable.key); -- GitLab From 6fb80b3e75b5ab85ca7eeb1e5cba33b5f1d2d0db Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 16 Feb 2024 15:04:17 -0600 Subject: [PATCH 0712/1418] block: sed-opal: handle empty atoms when parsing response [ Upstream commit 5429c8de56f6b2bd8f537df3a1e04e67b9c04282 ] The SED Opal response parsing function response_parse() does not handle the case of an empty atom in the response. This causes the entry count to be too high and the response fails to be parsed. Recognizing, but ignoring, empty atoms allows response handling to succeed. Signed-off-by: Greg Joyce Link: https://lore.kernel.org/r/20240216210417.3526064-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/opal_proto.h | 1 + block/sed-opal.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/block/opal_proto.h b/block/opal_proto.h index 7152aa1f1a49e..7f306b08a0fe7 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -71,6 +71,7 @@ enum opal_response_token { #define SHORT_ATOM_BYTE 0xBF #define MEDIUM_ATOM_BYTE 0xDF #define LONG_ATOM_BYTE 0xE3 +#define EMPTY_ATOM_BYTE 0xFF #define OPAL_INVAL_PARAM 12 #define OPAL_MANUFACTURED_INACTIVE 0x08 diff --git a/block/sed-opal.c b/block/sed-opal.c index 9bdb833e5817d..25e4ce452c1d3 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -935,16 +935,20 @@ static int response_parse(const u8 *buf, size_t length, token_length = response_parse_medium(iter, pos); else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ token_length = response_parse_long(iter, pos); + else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */ + token_length = 1; else /* TOKEN */ token_length = response_parse_token(iter, pos); if (token_length < 0) return token_length; + if (pos[0] != EMPTY_ATOM_BYTE) + num_entries++; + pos += token_length; total -= token_length; iter++; - num_entries++; } resp->num = num_entries; -- GitLab From b22b54f247f601a0896092c779d4c6bb80038475 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 20 Feb 2024 19:11:51 +0100 Subject: [PATCH 0713/1418] dm-verity, dm-crypt: align "struct bvec_iter" correctly [ Upstream commit 787f1b2800464aa277236a66eb3c279535edd460 ] "struct bvec_iter" is defined with the __packed attribute, so it is aligned on a single byte. On X86 (and on other architectures that support unaligned addresses in hardware), "struct bvec_iter" is accessed using the 8-byte and 4-byte memory instructions, however these instructions are less efficient if they operate on unaligned addresses. (on RISC machines that don't have unaligned access in hardware, GCC generates byte-by-byte accesses that are very inefficient - see [1]) This commit reorders the entries in "struct dm_verity_io" and "struct convert_context", so that "struct bvec_iter" is aligned on 8 bytes. [1] https://lore.kernel.org/all/ZcLuWUNRZadJr0tQ@fedora/T/ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-crypt.c | 4 ++-- drivers/md/dm-verity.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3e215aa85b99a..e8c534b5870ac 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -52,11 +52,11 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 4620a98c99561..db93a91169d5e 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -80,12 +80,12 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned int n_blocks; bool in_tasklet; - struct bvec_iter iter; - struct work_struct work; char *recheck_buffer; -- GitLab From 53983d354bd7abf0605a85e28e1325a82d35d455 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:27 -0600 Subject: [PATCH 0714/1418] arm64: dts: Fix dtc interrupt_provider warnings [ Upstream commit 91adecf911e5df78ea3e8f866e69db2c33416a5c ] The dtc interrupt_provider warning is off by default. Fix all the warnings so it can be enabled. Signed-off-by: Rob Herring Reviewed-By: AngeloGioacchino Del Regno # Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Acked-by: Florian Fainelli #Broadcom Acked-by: Chanho Min Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-3-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amazon/alpine-v2.dtsi | 1 - arch/arm64/boot/dts/amazon/alpine-v3.dtsi | 1 - arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 1 + arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 1 + arch/arm64/boot/dts/lg/lg1312.dtsi | 1 - arch/arm64/boot/dts/lg/lg1313.dtsi | 1 - arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 1 - arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 1 + arch/arm64/boot/dts/renesas/ulcb-kf.dtsi | 4 ++++ 9 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index 4eb2cd14e00b0..9b6da84deae7a 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -145,7 +145,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <160>; al,msi-num-spis = <160>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 73a352ea8fd5c..b30014d4dc29c 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -351,7 +351,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <336>; al,msi-num-spis = <959>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index fda97c47f4e97..d5778417455c0 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -584,6 +584,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 8f8c25e51194d..473d7d0ddf369 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -442,6 +442,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinmux 0 0 16>, <&pinmux 16 71 2>, diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 78ae73d0cf365..98ff17b14b2a5 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 2173316573bee..8e9410d8f46c0 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index a06a0a889c43f..73d8803b54d8b 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -133,7 +133,6 @@ odmi: odmi@300000 { compatible = "marvell,odmi-controller"; - interrupt-controller; msi-controller; marvell,odmi-frames = <4>; reg = <0x300000 0x4000>, diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 5117b2e7985af..998c2e78168a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -111,6 +111,7 @@ compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; + #interrupt-cells = <1>; interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 588b14b66b6fb..f37abfc13fe59 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -251,6 +251,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; @@ -311,6 +312,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; }; @@ -331,6 +333,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio7>; interrupts = <3 IRQ_TYPE_EDGE_FALLING>; }; @@ -341,6 +344,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio5>; interrupts = <9 IRQ_TYPE_EDGE_FALLING>; }; -- GitLab From 995e91c9556c8fc6028b474145a36e947d1eb6b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Feb 2024 19:41:23 +0000 Subject: [PATCH 0715/1418] btrfs: fix data races when accessing the reserved amount of block reserves [ Upstream commit e06cc89475eddc1f3a7a4d471524256152c68166 ] At space_info.c we have several places where we access the ->reserved field of a block reserve without taking the block reserve's spinlock first, which makes KCSAN warn about a data race since that field is always updated while holding the spinlock. The reports from KCSAN are like the following: [117.193526] BUG: KCSAN: data-race in btrfs_block_rsv_release [btrfs] / need_preemptive_reclaim [btrfs] [117.195148] read to 0x000000017f587190 of 8 bytes by task 6303 on cpu 3: [117.195172] need_preemptive_reclaim+0x222/0x2f0 [btrfs] [117.195992] __reserve_bytes+0xbb0/0xdc8 [btrfs] [117.196807] btrfs_reserve_metadata_bytes+0x4c/0x120 [btrfs] [117.197620] btrfs_block_rsv_add+0x78/0xa8 [btrfs] [117.198434] btrfs_delayed_update_inode+0x154/0x368 [btrfs] [117.199300] btrfs_update_inode+0x108/0x1c8 [btrfs] [117.200122] btrfs_dirty_inode+0xb4/0x140 [btrfs] [117.200937] btrfs_update_time+0x8c/0xb0 [btrfs] [117.201754] touch_atime+0x16c/0x1e0 [117.201789] filemap_read+0x674/0x728 [117.201823] btrfs_file_read_iter+0xf8/0x410 [btrfs] [117.202653] vfs_read+0x2b6/0x498 [117.203454] ksys_read+0xa2/0x150 [117.203473] __s390x_sys_read+0x68/0x88 [117.203495] do_syscall+0x1c6/0x210 [117.203517] __do_syscall+0xc8/0xf0 [117.203539] system_call+0x70/0x98 [117.203579] write to 0x000000017f587190 of 8 bytes by task 11 on cpu 0: [117.203604] btrfs_block_rsv_release+0x2e8/0x578 [btrfs] [117.204432] btrfs_delayed_inode_release_metadata+0x7c/0x1d0 [btrfs] [117.205259] __btrfs_update_delayed_inode+0x37c/0x5e0 [btrfs] [117.206093] btrfs_async_run_delayed_root+0x356/0x498 [btrfs] [117.206917] btrfs_work_helper+0x160/0x7a0 [btrfs] [117.207738] process_one_work+0x3b6/0x838 [117.207768] worker_thread+0x75e/0xb10 [117.207797] kthread+0x21a/0x230 [117.207830] __ret_from_fork+0x6c/0xb8 [117.207861] ret_from_fork+0xa/0x30 So add a helper to get the reserved amount of a block reserve while holding the lock. The value may be not be up to date anymore when used by need_preemptive_reclaim() and btrfs_preempt_reclaim_metadata_space(), but that's ok since the worst it can do is cause more reclaim work do be done sooner rather than later. Reading the field while holding the lock instead of using the data_race() annotation is used in order to prevent load tearing. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-rsv.h | 16 ++++++++++++++++ fs/btrfs/space-info.c | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index 578c3497a455c..cda79d3e0c263 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,20 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2635fb4bffa06..8b75f436a9a3c 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -847,7 +847,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; @@ -948,8 +948,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1164,7 +1164,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1176,9 +1176,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1198,16 +1198,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } -- GitLab From ab1be3f1aa7799f99155488c28eacaef65eb68fb Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Feb 2024 20:10:07 +0000 Subject: [PATCH 0716/1418] btrfs: fix data race at btrfs_use_block_rsv() when accessing block reserve [ Upstream commit c7bb26b847e5b97814f522686068c5628e2b3646 ] At btrfs_use_block_rsv() we read the size of a block reserve without locking its spinlock, which makes KCSAN complain because the size of a block reserve is always updated while holding its spinlock. The report from KCSAN is the following: [653.313148] BUG: KCSAN: data-race in btrfs_update_delayed_refs_rsv [btrfs] / btrfs_use_block_rsv [btrfs] [653.314755] read to 0x000000017f5871b8 of 8 bytes by task 7519 on cpu 0: [653.314779] btrfs_use_block_rsv+0xe4/0x2f8 [btrfs] [653.315606] btrfs_alloc_tree_block+0xdc/0x998 [btrfs] [653.316421] btrfs_force_cow_block+0x220/0xe38 [btrfs] [653.317242] btrfs_cow_block+0x1ac/0x568 [btrfs] [653.318060] btrfs_search_slot+0xda2/0x19b8 [btrfs] [653.318879] btrfs_del_csums+0x1dc/0x798 [btrfs] [653.319702] __btrfs_free_extent.isra.0+0xc24/0x2028 [btrfs] [653.320538] __btrfs_run_delayed_refs+0xd3c/0x2390 [btrfs] [653.321340] btrfs_run_delayed_refs+0xae/0x290 [btrfs] [653.322140] flush_space+0x5e4/0x718 [btrfs] [653.322958] btrfs_preempt_reclaim_metadata_space+0x102/0x2f8 [btrfs] [653.323781] process_one_work+0x3b6/0x838 [653.323800] worker_thread+0x75e/0xb10 [653.323817] kthread+0x21a/0x230 [653.323836] __ret_from_fork+0x6c/0xb8 [653.323855] ret_from_fork+0xa/0x30 [653.323887] write to 0x000000017f5871b8 of 8 bytes by task 576 on cpu 3: [653.323906] btrfs_update_delayed_refs_rsv+0x1a4/0x250 [btrfs] [653.324699] btrfs_add_delayed_data_ref+0x468/0x6d8 [btrfs] [653.325494] btrfs_free_extent+0x76/0x120 [btrfs] [653.326280] __btrfs_mod_ref+0x6a8/0x6b8 [btrfs] [653.327064] btrfs_dec_ref+0x50/0x70 [btrfs] [653.327849] walk_up_proc+0x236/0xa50 [btrfs] [653.328633] walk_up_tree+0x21c/0x448 [btrfs] [653.329418] btrfs_drop_snapshot+0x802/0x1328 [btrfs] [653.330205] btrfs_clean_one_deleted_snapshot+0x184/0x238 [btrfs] [653.330995] cleaner_kthread+0x2b0/0x2f0 [btrfs] [653.331781] kthread+0x21a/0x230 [653.331800] __ret_from_fork+0x6c/0xb8 [653.331818] ret_from_fork+0xa/0x30 So add a helper to get the size of a block reserve while holding the lock. Reading the field while holding the lock instead of using the data_race() annotation is used in order to prevent load tearing. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-rsv.c | 2 +- fs/btrfs/block-rsv.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 507b44d18572d..4cbf386166209 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -512,7 +512,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index cda79d3e0c263..df87c4949d065 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -117,4 +117,20 @@ static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) return ret; } +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ -- GitLab From 80656ee2baa7d7b86834c6de2ce8ca19e403edfc Mon Sep 17 00:00:00 2001 From: Andre Werner Date: Mon, 19 Feb 2024 06:33:32 +0100 Subject: [PATCH 0717/1418] net: smsc95xx: add support for SYS TEC USB-SPEmodule1 [ Upstream commit 45532b21dc2a692444b6ad5f71c253cca53e8103 ] This patch adds support for the SYS TEC USB-SPEmodule1 10Base-T1L ethernet device to the existing smsc95xx driver by adding the new USB VID/PID pair. Signed-off-by: Andre Werner Link: https://lore.kernel.org/r/20240219053413.4732-1-andre.werner@systec-electronic.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee2575..2fa46baa589e5 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2104,6 +2104,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9E08), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), -- GitLab From 50fbd3a7210f932ae6b35a31a005217f5b26677c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 21 Feb 2024 15:05:35 +0100 Subject: [PATCH 0718/1418] wifi: mac80211: only call drv_sta_rc_update for uploaded stations [ Upstream commit 413dafc8170fcb925fb17af8842f06af305f8e0b ] When a station has not been uploaded yet, receiving SMPS or channel width notification action frames can lead to rate_control_rate_update calling drv_sta_rc_update with uninitialized driver private data. Fix this by adding a missing check for sta->uploaded. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240221140535.16102-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d5ea5f5bcf3a0..9d33fd2377c88 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local, rcu_read_unlock(); } - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + if (sta->uploaded) + drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) -- GitLab From 008985fd35a7aa284177f69856206fcf0d0bd129 Mon Sep 17 00:00:00 2001 From: Johnny Hsieh Date: Mon, 26 Feb 2024 21:44:50 +0800 Subject: [PATCH 0719/1418] ASoC: amd: yc: Add Lenovo ThinkBook 21J0 into DMI quirk table [ Upstream commit 50ee641643dd0f46702e9a99354398196e1734c2 ] This patch adds Lenovo 21J0 (ThinkBook 16 G5+ ARP) to the DMI quirks table to enable internal microphone array. Cc: linux-sound@vger.kernel.org Signed-off-by: Johnny Hsieh Link: https://msgid.link/r/TYSPR04MB8429D62DFDB6727866ECF1DEC55A2@TYSPR04MB8429.apcprd04.prod.outlook.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 5921af7fd92c5..0568e64d10150 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J0"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From b0e50fa810e805fb48bbabfc38096194eb5591f7 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 21 Feb 2024 12:47:24 +0530 Subject: [PATCH 0720/1418] scsi: mpt3sas: Prevent sending diag_reset when the controller is ready [ Upstream commit ee0017c3ed8a8abfa4d40e42f908fb38c31e7515 ] If the driver detects that the controller is not ready before sending the first IOC facts command, it will wait for a maximum of 10 seconds for it to become ready. However, even if the controller becomes ready within 10 seconds, the driver will still issue a diagnostic reset. Modify the driver to avoid sending a diag reset if the controller becomes ready within the 10-second wait time. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240221071724.14986-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 809be43f440dc..8e6ac08e553bb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7398,7 +7398,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } -- GitLab From 5e61a994b2280f0dfc1ff2465d763460fd944bc1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 23 Feb 2024 14:54:34 +0800 Subject: [PATCH 0721/1418] ALSA: hda/realtek - ALC285 reduce pop noise from Headphone port [ Upstream commit b34bf65838f7c6e785f62681605a538b73c2808c ] It had pop noise from Headphone port when system reboot state. If NID 58h Index 0x0 to fill default value, it will reduce pop noise. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/7493e207919a4fb3a0599324fd010e3e@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 75bd7b2fa4ee6..ede3f8b273d79 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3681,6 +3681,7 @@ static void alc285_hp_init(struct hda_codec *codec) int i, val; int coef38, coef0d, coef36; + alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */ alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */ coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */ coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */ -- GitLab From 05c7c2d198a0143ef660f98eac2fc5086e644551 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 22 Feb 2024 20:56:59 +0800 Subject: [PATCH 0722/1418] drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series [ Upstream commit c671ec01311b4744b377f98b0b4c6d033fe569b3 ] Currently, GPU resets can now be performed successfully on the Raven series. While GPU reset is required for the S3 suspend abort case. So now can enable gpu reset for S3 abort cases on the Raven series. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 489c89465c78b..c373a2a3248eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,11 +584,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1285,24 +1308,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; -- GitLab From 0e3732d1df3550ac9fc0fbf4d1acbd2901514605 Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 28 Feb 2024 15:39:14 +0800 Subject: [PATCH 0723/1418] ASoC: amd: yc: Fix non-functional mic on Lenovo 21J2 [ Upstream commit ed00a6945dc32462c2d3744a3518d2316da66fcc ] Like many other models, the Lenovo 21J2 (ThinkBook 16 G5+ APO) needs a quirk entry for the internal microphone to function. Signed-off-by: Jiawei Wang Link: https://msgid.link/r/20240228073914.232204-2-me@jwang.link Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 0568e64d10150..e0f406b6646ba 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 567c0411dc3b424fc7bd1e6109726d7ba32d4f73 Mon Sep 17 00:00:00 2001 From: Yuxuan Hu <20373622@buaa.edu.cn> Date: Wed, 3 Jan 2024 17:10:43 +0800 Subject: [PATCH 0724/1418] Bluetooth: rfcomm: Fix null-ptr-deref in rfcomm_check_security [ Upstream commit 2535b848fa0f42ddff3e5255cf5e742c9b77bb26 ] During our fuzz testing of the connection and disconnection process at the RFCOMM layer, we discovered this bug. By comparing the packets from a normal connection and disconnection process with the testcase that triggered a KASAN report. We analyzed the cause of this bug as follows: 1. In the packets captured during a normal connection, the host sends a `Read Encryption Key Size` type of `HCI_CMD` packet (Command Opcode: 0x1408) to the controller to inquire the length of encryption key.After receiving this packet, the controller immediately replies with a Command Completepacket (Event Code: 0x0e) to return the Encryption Key Size. 2. In our fuzz test case, the timing of the controller's response to this packet was delayed to an unexpected point: after the RFCOMM and L2CAP layers had disconnected but before the HCI layer had disconnected. 3. After receiving the Encryption Key Size Response at the time described in point 2, the host still called the rfcomm_check_security function. However, by this time `struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;` had already been released, and when the function executed `return hci_conn_security(conn->hcon, d->sec_level, auth_type, d->out);`, specifically when accessing `conn->hcon`, a null-ptr-deref error occurred. To fix this bug, check if `sk->sk_state` is BT_CLOSED before calling rfcomm_recv_frame in rfcomm_process_rx. Signed-off-by: Yuxuan Hu <20373622@buaa.edu.cn> Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8d6fce9005bdd..4f54c7df3a94f 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1937,7 +1937,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) { + if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) { s = rfcomm_recv_frame(s, skb); if (!s) break; -- GitLab From 2f3ce8fcbf52c4f686a3762e20982ce068dace54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 22 Jan 2024 17:59:55 +0100 Subject: [PATCH 0725/1418] Bluetooth: mgmt: Fix limited discoverable off timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0bd1fb586235224048c726922db048d1bce6354a ] LIMITED_DISCOVERABLE flag is not reset from Class of Device and advertisement on limited discoverable timeout. This prevents to pass PTS test GAP/DISC/LIMM/BV-02-C Calling set_discoverable_sync as when the limited discovery is set correctly update the Class of Device and advertisement. Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d631a2e60166..ab63f807e3c80 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1039,6 +1039,8 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static int set_discoverable_sync(struct hci_dev *hdev, void *data); + static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1057,7 +1059,7 @@ static void discov_off(struct work_struct *work) hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; - hci_update_discoverable(hdev); + hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); -- GitLab From 72dbf660e01a24a4205df2cffffb4e2e6ea5321e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 29 Feb 2024 22:17:37 +0900 Subject: [PATCH 0726/1418] firewire: core: use long bus reset on gap count error [ Upstream commit d0b06dc48fb15902d7da09c5c0861e7f042a9381 ] When resetting the bus after a gap count error, use a long rather than short bus reset. IEEE 1394-1995 uses only long bus resets. IEEE 1394a adds the option of short bus resets. When video or audio transmission is in progress and a device is hot-plugged elsewhere on the bus, the resulting bus reset can cause video frame drops or audio dropouts. Short bus resets reduce or eliminate this problem. Accordingly, short bus resets are almost always preferred. However, on a mixed 1394/1394a bus, a short bus reset can trigger an immediate additional bus reset. This double bus reset can be interpreted differently by different nodes on the bus, resulting in an inconsistent gap count after the bus reset. An inconsistent gap count will cause another bus reset, leading to a neverending bus reset loop. This only happens for some bus topologies, not for all mixed 1394/1394a buses. By instead sending a long bus reset after a gap count inconsistency, we avoid the doubled bus reset, restoring the bus to normal operation. Signed-off-by: Adam Goldman Link: https://sourceforge.net/p/linux1394/mailman/message/58741624/ Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-card.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 8aaa7fcb2630d..401a77e3b5fa8 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -500,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } -- GitLab From 9a4fb2bdee2f72b1f33b5bf96b4a51e5692878b8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 2 Feb 2024 11:08:12 +0100 Subject: [PATCH 0727/1418] arm64: tegra: Set the correct PHY mode for MGBE [ Upstream commit 4c892121d43bc2b45896ca207b54f39a8fa6b852 ] The PHY is configured in 10GBASE-R, so make sure to reflect that in DT. Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts index f094011be9ed9..8099dc04ed2e1 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts @@ -2024,7 +2024,7 @@ status = "okay"; phy-handle = <&mgbe0_phy>; - phy-mode = "usxgmii"; + phy-mode = "10gbase-r"; mdio { #address-cells = <1>; -- GitLab From 374709a7e541ca3e910f93e6ff24fe819923fc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alban=20Boy=C3=A9?= Date: Wed, 28 Feb 2024 19:28:41 +0000 Subject: [PATCH 0728/1418] ASoC: Intel: bytcr_rt5640: Add an extra entry for the Chuwi Vi8 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f8b0127aca8c60826e7354e504a12d4a46b1c3bb ] The bios version can differ depending if it is a dual-boot variant of the tablet. Therefore another DMI match is required. Signed-off-by: Alban Boyé Reviewed-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240228192807.15130-1-alban.boye@protonmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 797d0a48d6066..094445036c20f 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -685,6 +685,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Chuwi Vi8 dual-boot (CWI506) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "i86"), + /* The above are too generic, also match BIOS info */ + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Chuwi Vi10 (CWI505) */ .matches = { -- GitLab From d575eb8747d6f748805187a49a614a152021728c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 5 Mar 2024 11:10:42 +0100 Subject: [PATCH 0729/1418] Input: gpio_keys_polled - suppress deferred probe error for gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 963465a33141d0d52338e77f80fe543d2c9dc053 ] On a PC Engines APU our admins are faced with: $ dmesg | grep -c "gpio-keys-polled gpio-keys-polled: unable to claim gpio 0, err=-517" 261 Such a message always appears when e.g. a new USB device is plugged in. Suppress this message which considerably clutters the kernel log for EPROBE_DEFER (i.e. -517). Signed-off-by: Uwe Kleine-König Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240305101042.10953-2-u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/gpio_keys_polled.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index c3937d2fc7446..a0f9978c68f55 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -319,12 +319,10 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) error = devm_gpio_request_one(dev, button->gpio, flags, button->desc ? : DRV_NAME); - if (error) { - dev_err(dev, - "unable to claim gpio %u, err=%d\n", - button->gpio, error); - return error; - } + if (error) + return dev_err_probe(dev, error, + "unable to claim gpio %u\n", + button->gpio); bdata->gpiod = gpio_to_desc(button->gpio); if (!bdata->gpiod) { -- GitLab From f11b50b8baff81f46d21477c88e5af5912bdb847 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:35 +0000 Subject: [PATCH 0730/1418] ASoC: wm8962: Enable oscillator if selecting WM8962_FLL_OSC [ Upstream commit 03c7874106ca5032a312626b927b1c35f07b1f35 ] Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-1-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b901e4c65e8a5..35d8cd4fc71a7 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2914,8 +2914,12 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s switch (fll_id) { case WM8962_FLL_MCLK: case WM8962_FLL_BCLK: + fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + break; case WM8962_FLL_OSC: fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + snd_soc_component_update_bits(component, WM8962_PLL2, + WM8962_OSC_ENA, WM8962_OSC_ENA); break; case WM8962_FLL_INT: snd_soc_component_update_bits(component, WM8962_FLL_CONTROL_1, -- GitLab From 0a5d59ed95f1c4bbef208863c14783b1f42b7a8e Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:36 +0000 Subject: [PATCH 0731/1418] ASoC: wm8962: Enable both SPKOUTR_ENA and SPKOUTL_ENA in mono mode [ Upstream commit 6fa849e4d78b880e878138bf238e4fd2bac3c4fa ] Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-2-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 35d8cd4fc71a7..806b69c9b2e36 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2229,6 +2229,9 @@ SND_SOC_DAPM_PGA_E("HPOUT", SND_SOC_NOPM, 0, 0, NULL, 0, hp_event, SND_SOC_DAPM_OUTPUT("HPOUTL"), SND_SOC_DAPM_OUTPUT("HPOUTR"), + +SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), +SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), }; static const struct snd_soc_dapm_widget wm8962_dapm_spk_mono_widgets[] = { @@ -2236,7 +2239,6 @@ SND_SOC_DAPM_MIXER("Speaker Mixer", WM8962_MIXER_ENABLES, 1, 0, spkmixl, ARRAY_SIZE(spkmixl)), SND_SOC_DAPM_MUX_E("Speaker PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("Speaker Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), SND_SOC_DAPM_OUTPUT("SPKOUT"), }; @@ -2251,9 +2253,6 @@ SND_SOC_DAPM_MUX_E("SPKOUTL PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, SND_SOC_DAPM_MUX_E("SPKOUTR PGA", WM8962_PWR_MGMT_2, 3, 0, &spkoutr_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), -SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), - SND_SOC_DAPM_OUTPUT("SPKOUTL"), SND_SOC_DAPM_OUTPUT("SPKOUTR"), }; @@ -2366,12 +2365,18 @@ static const struct snd_soc_dapm_route wm8962_spk_mono_intercon[] = { { "Speaker PGA", "Mixer", "Speaker Mixer" }, { "Speaker PGA", "DAC", "DACL" }, - { "Speaker Output", NULL, "Speaker PGA" }, - { "Speaker Output", NULL, "SYSCLK" }, - { "Speaker Output", NULL, "TOCLK" }, - { "Speaker Output", NULL, "TEMP_SPK" }, + { "SPKOUTL Output", NULL, "Speaker PGA" }, + { "SPKOUTL Output", NULL, "SYSCLK" }, + { "SPKOUTL Output", NULL, "TOCLK" }, + { "SPKOUTL Output", NULL, "TEMP_SPK" }, + + { "SPKOUTR Output", NULL, "Speaker PGA" }, + { "SPKOUTR Output", NULL, "SYSCLK" }, + { "SPKOUTR Output", NULL, "TOCLK" }, + { "SPKOUTR Output", NULL, "TEMP_SPK" }, - { "SPKOUT", NULL, "Speaker Output" }, + { "SPKOUT", NULL, "SPKOUTL Output" }, + { "SPKOUT", NULL, "SPKOUTR Output" }, }; static const struct snd_soc_dapm_route wm8962_spk_stereo_intercon[] = { -- GitLab From 807f991396648830274b644d53c9cef67e519983 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:39 +0000 Subject: [PATCH 0732/1418] ASoC: wm8962: Fix up incorrect error message in wm8962_set_fll [ Upstream commit 96e202f8c52ac49452f83317cf3b34cd1ad81e18 ] Use source instead of ret, which seems to be unrelated and will always be zero. Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-5-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 806b69c9b2e36..d215e58c4a7b3 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2933,7 +2933,7 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s WM8962_FLL_FRC_NCO, WM8962_FLL_FRC_NCO); break; default: - dev_err(component->dev, "Unknown FLL source %d\n", ret); + dev_err(component->dev, "Unknown FLL source %d\n", source); return -EINVAL; } -- GitLab From e6450d5e46a737a008b4885aa223486113bf0ad6 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 19 Jan 2024 07:39:06 -0800 Subject: [PATCH 0733/1418] do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak [ Upstream commit 3948abaa4e2be938ccdfc289385a27342fb13d43 ] syzbot identified a kernel information leak vulnerability in do_sys_name_to_handle() and issued the following report [1]. [1] "BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x100 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x100 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] do_sys_name_to_handle fs/fhandle.c:73 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x949/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] __kmem_cache_alloc_node+0x5c9/0x970 mm/slub.c:3517 __do_kmalloc_node mm/slab_common.c:1006 [inline] __kmalloc+0x121/0x3c0 mm/slab_common.c:1020 kmalloc include/linux/slab.h:604 [inline] do_sys_name_to_handle fs/fhandle.c:39 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x441/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Bytes 18-19 of 20 are uninitialized Memory access of size 20 starts at ffff888128a46380 Data copied to user address 0000000020000240" Per Chuck Lever's suggestion, use kzalloc() instead of kmalloc() to solve the problem. Fixes: 990d6c2d7aee ("vfs: Add name to file handle conversion support") Suggested-by: Chuck Lever III Reported-and-tested-by: Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240119153906.4367-1-n.zhandarovich@fintech.ru Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index f2bc27d1975e1..a8c25557c8c12 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -37,7 +37,7 @@ static long do_sys_name_to_handle(const struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; -- GitLab From 7533ed7668bc7296a5ad84e61cdf907aa8eb8fec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Feb 2024 12:39:20 -0800 Subject: [PATCH 0734/1418] fs: Fix rw_hint validation [ Upstream commit ec16b147a55bfa14e858234eb7b1a7c8e7cd5021 ] Reject values that are valid rw_hints after truncation but not before truncation by passing an untruncated value to rw_hint_valid(). Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Cc: Jeff Layton Cc: Chuck Lever Cc: Jens Axboe Cc: Stephen Rothwell Fixes: 5657cb0797c4 ("fs/fcntl: use copy_to/from_user() for u64 types") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240202203926.2478590-2-bvanassche@acm.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/fcntl.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 146c9ab0cd4b7..0964e5dbf0cac 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -267,7 +267,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) } #endif -static bool rw_hint_valid(enum rw_hint hint) +static bool rw_hint_valid(u64 hint) { switch (hint) { case RWH_WRITE_LIFE_NOT_SET: @@ -287,19 +287,17 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd, { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; - enum rw_hint hint; - u64 h; + u64 hint; switch (cmd) { case F_GET_RW_HINT: - h = inode->i_write_hint; - if (copy_to_user(argp, &h, sizeof(*argp))) + hint = inode->i_write_hint; + if (copy_to_user(argp, &hint, sizeof(*argp))) return -EFAULT; return 0; case F_SET_RW_HINT: - if (copy_from_user(&h, argp, sizeof(h))) + if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; - hint = (enum rw_hint) h; if (!rw_hint_valid(hint)) return -EINVAL; -- GitLab From ba0e1cc43e09167f3e516b426d5648d2903b0afe Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 5 Apr 2023 16:20:12 +0200 Subject: [PATCH 0735/1418] s390/dasd: add autoquiesce feature [ Upstream commit 1cee2975bbabd89df1097c354867192106b058ea ] Add the internal logic to check for autoquiesce triggers and handle them. Quiesce and resume are functions that tell Linux to stop/resume issuing I/Os to a specific DASD. The DASD driver allows a manual quiesce/resume via ioctl. Autoquiesce will define an amount of triggers that will lead to an automatic quiesce if a certain event occurs. There is no automatic resume. All events will be reported via DASD Extended Error Reporting (EER) if configured. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Reviewed-by: Halil Pasic Link: https://lore.kernel.org/r/20230405142017.2446986-3-sth@linux.ibm.com Signed-off-by: Jens Axboe Stable-dep-of: c3116e62ddef ("s390/dasd: fix double module refcount decrement") Signed-off-by: Sasha Levin --- arch/s390/include/uapi/asm/dasd.h | 2 ++ drivers/s390/block/dasd.c | 60 ++++++++++++++++++++++--------- drivers/s390/block/dasd_eer.c | 1 + drivers/s390/block/dasd_int.h | 2 ++ 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 93d1ccd3304c7..9c49c3d67cd56 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -78,6 +78,7 @@ typedef struct dasd_information2_t { * 0x040: give access to raw eckd data * 0x080: enable discard support * 0x100: enable autodisable for IFCC errors (default) + * 0x200: enable requeue of all requests on autoquiesce */ #define DASD_FEATURE_READONLY 0x001 #define DASD_FEATURE_USEDIAG 0x002 @@ -88,6 +89,7 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_USERAW 0x040 #define DASD_FEATURE_DISCARD 0x080 #define DASD_FEATURE_PATH_AUTODISABLE 0x100 +#define DASD_FEATURE_REQUEUEQUIESCE 0x200 #define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE #define DASD_PARTN_BITS 2 diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f207de4a87a0f..2f6976671496f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -73,7 +73,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *); static void dasd_profile_exit(struct dasd_profile *); static void dasd_hosts_init(struct dentry *, struct dasd_device *); static void dasd_hosts_exit(struct dasd_device *); - +static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *, + unsigned int); /* * SECTION: Operations on the device structure. */ @@ -2327,7 +2328,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) /* Non-temporary stop condition will trigger fail fast */ if (device->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(device))) { + !dasd_eer_enabled(device) && device->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; continue; @@ -2803,20 +2804,18 @@ restart: dasd_log_sense(cqr, &cqr->irb); } - /* First of all call extended error reporting. */ - if (dasd_eer_enabled(base) && - cqr->status == DASD_CQR_FAILED) { - dasd_eer_write(base, cqr, DASD_EER_FATALERROR); - - /* restart request */ + /* + * First call extended error reporting and check for autoquiesce + */ + spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); + if (cqr->status == DASD_CQR_FAILED && + dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) { cqr->status = DASD_CQR_FILLED; cqr->retries = 255; - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); goto restart; } + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); /* Process finished ERP request. */ if (cqr->refers) { @@ -2858,7 +2857,7 @@ static void __dasd_block_start_head(struct dasd_block *block) /* Non-temporary stop condition will trigger fail fast */ if (block->base->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(block->base))) { + !dasd_eer_enabled(block->base) && block->base->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; dasd_schedule_block_bh(block); @@ -3682,8 +3681,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device) dev_warn(&device->cdev->dev, "No operational channel path is left " "for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone"); - /* First of all call extended error reporting. */ - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); if (device->state < DASD_STATE_BASIC) return 0; @@ -3815,7 +3814,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) "No verified channel paths remain for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last verified path gone"); - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); } @@ -3837,7 +3837,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path); void dasd_generic_space_exhaust(struct dasd_device *device, struct dasd_ccw_req *cqr) { - dasd_eer_write(device, NULL, DASD_EER_NOSPC); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC); if (device->state < DASD_STATE_BASIC) return; @@ -3931,6 +3932,31 @@ void dasd_schedule_requeue(struct dasd_device *device) } EXPORT_SYMBOL(dasd_schedule_requeue); +static int dasd_handle_autoquiesce(struct dasd_device *device, + struct dasd_ccw_req *cqr, + unsigned int reason) +{ + /* in any case write eer message with reason */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, cqr, reason); + + if (!test_bit(reason, &device->aq_mask)) + return 0; + + /* notify eer about autoquiesce */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); + + pr_info("%s: The DASD has been put in the quiesce state\n", + dev_name(&device->cdev->dev)); + dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); + + if (device->features & DASD_FEATURE_REQUEUEQUIESCE) + dasd_schedule_requeue(device); + + return 1; +} + static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, int rdc_buffer_size, int magic) diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index d4d31cd11d261..d16c699b9ac6d 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr, break; case DASD_EER_NOPATH: case DASD_EER_NOSPC: + case DASD_EER_AUTOQUIESCE: dasd_eer_write_standard_trigger(device, NULL, id); break; case DASD_EER_STATECHANGE: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index f50932518f83a..00bcd177264ac 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -464,6 +464,7 @@ extern struct dasd_discipline *dasd_diag_discipline_pointer; #define DASD_EER_STATECHANGE 3 #define DASD_EER_PPRCSUSPEND 4 #define DASD_EER_NOSPC 5 +#define DASD_EER_AUTOQUIESCE 31 /* DASD path handling */ @@ -641,6 +642,7 @@ struct dasd_device { struct dasd_format_entry format_entry; struct kset *paths_info; struct dasd_copy_relation *copy; + unsigned long aq_mask; }; struct dasd_block { -- GitLab From 977bb962a116fc8f2d5b633ba355cbab4f48fbf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 8 Feb 2024 17:42:48 +0100 Subject: [PATCH 0736/1418] s390/dasd: Use dev_*() for device log messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79ae56fc475869d636071f66d9e4ef2a3819eee6 ] All log messages in dasd.c use the printk variants of pr_*(). They all add the name of the affected device manually to the log message. This can be simplified by using the dev_*() variants of printk, which include the device information and make a separate call to dev_name() unnecessary. The KMSG_COMPONENT and the pr_fmt() definition can be dropped. Note that this removes the "dasd: " prefix from the one pr_info() call in dasd_init(). However, the log message already provides all relevant information. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-10-sth@linux.ibm.com Signed-off-by: Jens Axboe Stable-dep-of: c3116e62ddef ("s390/dasd: fix double module refcount decrement") Signed-off-by: Sasha Levin --- drivers/s390/block/dasd.c | 50 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 2f6976671496f..029bb9e15ad90 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -8,9 +8,6 @@ * Copyright IBM Corp. 1999, 2009 */ -#define KMSG_COMPONENT "dasd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include @@ -3390,8 +3387,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) ret = ccw_device_set_online(cdev); if (ret) - pr_warn("%s: Setting the DASD online failed with rc=%d\n", - dev_name(&cdev->dev), ret); + dev_warn(&cdev->dev, "Setting the DASD online failed with rc=%d\n", ret); } /* @@ -3478,8 +3474,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, { struct dasd_discipline *discipline; struct dasd_device *device; + struct device *dev; int rc; + dev = &cdev->dev; + /* first online clears initial online feature flag */ dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); device = dasd_create_device(cdev); @@ -3492,11 +3491,10 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Try to load the required module. */ rc = request_module(DASD_DIAG_MOD); if (rc) { - pr_warn("%s Setting the DASD online failed " - "because the required module %s " - "could not be loaded (rc=%d)\n", - dev_name(&cdev->dev), DASD_DIAG_MOD, - rc); + dev_warn(dev, "Setting the DASD online failed " + "because the required module %s " + "could not be loaded (rc=%d)\n", + DASD_DIAG_MOD, rc); dasd_delete_device(device); return -ENODEV; } @@ -3504,8 +3502,7 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Module init could have failed, so check again here after * request_module(). */ if (!dasd_diag_discipline_pointer) { - pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of missing DIAG discipline\n"); dasd_delete_device(device); return -ENODEV; } @@ -3526,8 +3523,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* check_device will allocate block device if necessary */ rc = discipline->check_device(device); if (rc) { - pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n", - dev_name(&cdev->dev), discipline->name, rc); + dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", + discipline->name, rc); module_put(discipline->owner); module_put(base_discipline->owner); dasd_delete_device(device); @@ -3536,16 +3533,15 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_set_target_state(device, DASD_STATE_ONLINE); if (device->state <= DASD_STATE_KNOWN) { - pr_warn("%s Setting the DASD online failed because of a missing discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of a missing discipline\n"); rc = -ENODEV; dasd_set_target_state(device, DASD_STATE_NEW); if (device->block) dasd_free_block(device->block); dasd_delete_device(device); - } else - pr_debug("dasd_generic device %s found\n", - dev_name(&cdev->dev)); + } else { + dev_dbg(dev, "dasd_generic device found\n"); + } wait_event(dasd_init_waitq, _wait_for_device(device)); @@ -3556,10 +3552,13 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online); int dasd_generic_set_offline(struct ccw_device *cdev) { + int max_count, open_count, rc; struct dasd_device *device; struct dasd_block *block; - int max_count, open_count, rc; unsigned long flags; + struct device *dev; + + dev = &cdev->dev; rc = 0; spin_lock_irqsave(get_ccwdev_lock(cdev), flags); @@ -3580,11 +3579,10 @@ int dasd_generic_set_offline(struct ccw_device *cdev) open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) - pr_warn("%s: The DASD cannot be set offline with open count %i\n", - dev_name(&cdev->dev), open_count); + dev_warn(dev, "The DASD cannot be set offline with open count %i\n", + open_count); else - pr_warn("%s: The DASD cannot be set offline while it is in use\n", - dev_name(&cdev->dev)); + dev_warn(dev, "The DASD cannot be set offline while it is in use\n"); rc = -EBUSY; goto out_err; } @@ -3947,8 +3945,8 @@ static int dasd_handle_autoquiesce(struct dasd_device *device, if (dasd_eer_enabled(device)) dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); - pr_info("%s: The DASD has been put in the quiesce state\n", - dev_name(&device->cdev->dev)); + dev_info(&device->cdev->dev, + "The DASD has been put in the quiesce state\n"); dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); if (device->features & DASD_FEATURE_REQUEUEQUIESCE) -- GitLab From ad999aa18103fa038787b6a8a55020abcf34df1a Mon Sep 17 00:00:00 2001 From: Miroslav Franc Date: Fri, 9 Feb 2024 13:45:22 +0100 Subject: [PATCH 0737/1418] s390/dasd: fix double module refcount decrement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c3116e62ddeff79cae342147753ce596f01fcf06 ] Once the discipline is associated with the device, deleting the device takes care of decrementing the module's refcount. Doing it manually on this error path causes refcount to artificially decrease on each error while it should just stay the same. Fixes: c020d722b110 ("s390/dasd: fix panic during offline processing") Signed-off-by: Miroslav Franc Signed-off-by: Jan Höppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240209124522.3697827-3-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/s390/block/dasd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 029bb9e15ad90..341d65acd715d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3512,12 +3512,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_delete_device(device); return -EINVAL; } + device->base_discipline = base_discipline; if (!try_module_get(discipline->owner)) { - module_put(base_discipline->owner); dasd_delete_device(device); return -EINVAL; } - device->base_discipline = base_discipline; device->discipline = discipline; /* check_device will allocate block device if necessary */ @@ -3525,8 +3524,6 @@ int dasd_generic_set_online(struct ccw_device *cdev, if (rc) { dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", discipline->name, rc); - module_put(discipline->owner); - module_put(base_discipline->owner); dasd_delete_device(device); return rc; } -- GitLab From f8b89a36721f0ae80e2d3321368607f9e00c74ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:15 +0100 Subject: [PATCH 0738/1418] rcu/exp: Fix RCU expedited parallel grace period kworker allocation failure recovery [ Upstream commit a636c5e6f8fc34be520277e69c7c6ee1d4fc1d17 ] Under CONFIG_RCU_EXP_KTHREAD=y, the nodes initialization for expedited grace periods is queued to a kworker. However if the allocation of that kworker failed, the nodes initialization is performed synchronously by the caller instead. Now the check for kworker initialization failure relies on the kworker pointer to be NULL while its value might actually encapsulate an allocation failure error. Make sure to handle this case. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9d7464a90f85d..c879ed0c55079 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4471,6 +4471,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { pr_err("Failed to create %s!\n", par_gp_kworker_name); + rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); return; } -- GitLab From 267a6af60863490b54658fc239b3e3be5a0b6f92 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:16 +0100 Subject: [PATCH 0739/1418] rcu/exp: Handle RCU expedited grace period kworker allocation failure [ Upstream commit e7539ffc9a770f36bacedcf0fbfb4bf2f244f4a5 ] Just like is done for the kworker performing nodes initialization, gracefully handle the possible allocation failure of the RCU expedited grace period main kworker. While at it perform a rename of the related checking functions to better reflect the expedited specifics. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree_exp.h | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c879ed0c55079..61f9503a5fe9c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4465,6 +4465,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { pr_err("Failed to create %s!\n", gp_kworker_name); + rcu_exp_gp_kworker = NULL; return; } @@ -4473,6 +4474,7 @@ static void __init rcu_start_exp_gp_kworkers(void) pr_err("Failed to create %s!\n", par_gp_kworker_name); rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); + rcu_exp_gp_kworker = NULL; return; } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d2cbed96b462..75e8d9652f7bb 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -427,7 +427,12 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_exp_gp_kworker); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_exp_par_gp_kworker); } @@ -477,7 +482,12 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_gp_wq); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_par_gp_wq); } @@ -540,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!rcu_gp_par_worker_started() || + if (!rcu_exp_par_worker_started() || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { /* No worker started yet or last leaf, do direct call. */ @@ -910,7 +920,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + bool use_worker; unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; @@ -921,6 +931,9 @@ void synchronize_rcu_expedited(void) lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu_expedited() in RCU read-side critical section"); + use_worker = (rcu_scheduler_active != RCU_SCHEDULER_INIT) && + rcu_exp_worker_started(); + /* Is the state is such that the call is a grace period? */ if (rcu_blocking_is_gp()) { // Note well that this code runs with !PREEMPT && !SMP. @@ -950,7 +963,7 @@ void synchronize_rcu_expedited(void) return; /* Someone else did our work for us. */ /* Ensure that load happens before action based on it. */ - if (unlikely(boottime)) { + if (unlikely(!use_worker)) { /* Direct call during scheduler init and early_initcalls(). */ rcu_exp_sel_wait_wake(s); } else { @@ -968,7 +981,7 @@ void synchronize_rcu_expedited(void) /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); - if (likely(!boottime)) + if (likely(use_worker)) synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- GitLab From 96436365e5d80d0106ea785a4f80a58e7c9edff8 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 17 Feb 2024 20:25:38 -0800 Subject: [PATCH 0740/1418] nbd: null check for nla_nest_start [ Upstream commit 31edf4bbe0ba27fd03ac7d87eb2ee3d2a231af6d ] nla_nest_start() may fail and return NULL. Insert a check and set errno based on other call sites within the same source code. Signed-off-by: Navid Emamdoost Reviewed-by: Michal Kubecek Fixes: 47d902b90a32 ("nbd: add a status netlink command") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240218042534.it.206-kees@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9a53165de4cef..5c4be8dda253c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2408,6 +2408,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { -- GitLab From 283e38fc7dcefc6cc2272fdaddfd9f65b1cc9a0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2024 21:23:34 +0100 Subject: [PATCH 0741/1418] fs/select: rework stack allocation hack for clang [ Upstream commit ddb9fd7a544088ed70eccbb9f85e9cc9952131c1 ] A while ago, we changed the way that select() and poll() preallocate a temporary buffer just under the size of the static warning limit of 1024 bytes, as clang was frequently going slightly above that limit. The warnings have recently returned and I took another look. As it turns out, clang is not actually inherently worse at reserving stack space, it just happens to inline do_select() into core_sys_select(), while gcc never inlines it. Annotate do_select() to never be inlined and in turn remove the special case for the allocation size. This should give the same behavior for both clang and gcc all the time and once more avoids those warnings. Fixes: ad312f95d41c ("fs/select: avoid clang stack usage warning") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240216202352.2492798-1-arnd@kernel.org Reviewed-by: Kees Cook Reviewed-by: Andi Kleen Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/select.c | 2 +- include/linux/poll.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/select.c b/fs/select.c index 0ee55af1a55c2..d4d881d439dcd 100644 --- a/fs/select.c +++ b/fs/select.c @@ -476,7 +476,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, wait->_key |= POLLOUT_SET; } -static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) +static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2f..d1ea4f3714a84 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC -- GitLab From d6c28aefe9b46583767b706dbf53e9bcf7552b72 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:40 +0800 Subject: [PATCH 0742/1418] md: Don't clear MD_CLOSING when the raid is about to stop [ Upstream commit 9674f54e41fffaf06f6a60202e1fa4cc13de3cf5 ] The raid should not be opened anymore when it is about to be stopped. However, other processes can open it again if the flag MD_CLOSING is cleared before exiting. From now on, this flag will not be cleared when the raid will be stopped. Fixes: 065e519e71b2 ("md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-6-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1c87f3e708094..788acc81e7a84 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6243,7 +6243,15 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - mddev->flags = 0; + /* + * Don't clear MD_CLOSING, or mddev can be opened again. + * 'hold_active != 0' means mddev is still in the creation + * process and will be used later. + */ + if (mddev->hold_active) + mddev->flags = 0; + else + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -7571,7 +7579,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -7658,7 +7665,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7821,7 +7827,7 @@ unlock: mddev->hold_active = 0; mddev_unlock(mddev); out: - if(did_set_md_closing) + if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) clear_bit(MD_CLOSING, &mddev->flags); return err; } -- GitLab From eaf5eaa4064da1c69c2eb5fb500df5e19b4bd564 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:15 +0800 Subject: [PATCH 0743/1418] lib/cmdline: Fix an invalid format specifier in an assertion msg [ Upstream commit d2733a026fc7247ba42d7a8e1b737cf14bf1df21 ] The correct format specifier for p - n (both p and n are pointers) is %td, as the type should be ptrdiff_t. This was discovered by annotating KUnit assertion macros with gcc's printf specifier, but note that gcc incorrectly suggested a %d or %ld specifier (depending on the pointer size of the architecture being built). Fixes: 0ea09083116d ("lib/cmdline: Allow get_options() to take 0 to validate the input") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Daniel Latypov Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/cmdline_kunit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c index d4572dbc91453..705b82736be08 100644 --- a/lib/cmdline_kunit.c +++ b/lib/cmdline_kunit.c @@ -124,7 +124,7 @@ static void cmdline_do_one_range_test(struct kunit *test, const char *in, n, e[0], r[0]); p = memchr_inv(&r[1], 0, sizeof(r) - sizeof(r[0])); - KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %u out of bound", n, p - r); + KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %td out of bound", n, p - r); } static void cmdline_test_range(struct kunit *test) -- GitLab From 188e9aff68875f757ee1577daa30a046dcd5667b Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:16 +0800 Subject: [PATCH 0744/1418] lib: memcpy_kunit: Fix an invalid format specifier in an assertion msg [ Upstream commit 0a549ed22c3c7cc6da5c5f5918efd019944489a5 ] The 'i' passed as an assertion message is a size_t, so should use '%zu', not '%d'. This was found by annotating the _MSG() variants of KUnit's assertions to let gcc validate the format strings. Fixes: bb95ebbe89a7 ("lib: Introduce CONFIG_MEMCPY_KUNIT_TEST") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/memcpy_kunit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 2b5cc70ac53fc..dbedd99aa6163 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -32,7 +32,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(instance.data) != 32); \ for (size_t i = 0; i < sizeof(instance.data); i++) { \ KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \ - "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \ + "line %d: '%s' not initialized to 0x%02x @ %zu (saw 0x%02x)\n", \ __LINE__, #instance, v, i, instance.data[i]); \ } \ } while (0) @@ -41,7 +41,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(one) != sizeof(two)); \ for (size_t i = 0; i < sizeof(one); i++) { \ KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \ - "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \ + "line %d: %s.data[%zu] (0x%02x) != %s.data[%zu] (0x%02x)\n", \ __LINE__, #one, i, one.data[i], #two, i, two.data[i]); \ } \ kunit_info(test, "ok: " TEST_OP "() " name "\n"); \ -- GitLab From d12ffa03085e580b12a67068472134c980cd2e47 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:17 +0800 Subject: [PATCH 0745/1418] time: test: Fix incorrect format specifier [ Upstream commit 133e267ef4a26d19c93996a874714e9f3f8c70aa ] 'days' is a s64 (from div_s64), and so should use a %lld specifier. This was found by extending KUnit's assertion macros to use gcc's __printf attribute. Fixes: 276010551664 ("time: Improve performance of time64_to_tm()") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- kernel/time/time_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c index 831e8e779acef..f7c3de01197c9 100644 --- a/kernel/time/time_test.c +++ b/kernel/time/time_test.c @@ -73,7 +73,7 @@ static void time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %lld", \ year, month, mdday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); -- GitLab From 28e7fd1c70a203303203d1246135bcedc45b4d6c Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:18 +0800 Subject: [PATCH 0746/1418] rtc: test: Fix invalid format specifier. [ Upstream commit 8a904a3caa88118744062e872ae90f37748a8fd8 ] 'days' is a s64 (from div_s64), and so should use a %lld specifier. This was found by extending KUnit's assertion macros to use gcc's __printf attribute. Fixes: 1d1bb12a8b18 ("rtc: Improve performance of rtc_time64_to_tm(). Add tests.") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Acked-by: Alexandre Belloni Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- drivers/rtc/lib_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c index d5caf36c56cdc..225c859d6da55 100644 --- a/drivers/rtc/lib_test.c +++ b/drivers/rtc/lib_test.c @@ -54,7 +54,7 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \ year, month, mday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); -- GitLab From a34fba8c44ac11a354535f93f60b4db4706ca2be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Feb 2024 14:16:47 -0700 Subject: [PATCH 0747/1418] io_uring/net: unify how recvmsg and sendmsg copy in the msghdr [ Upstream commit 52307ac4f2b507f60bae6df5be938d35e199c688 ] For recvmsg, we roll our own since we support buffer selections. This isn't the case for sendmsg right now, but in preparation for doing so, make the recvmsg copy helpers generic so we can call them from the sendmsg side as well. Signed-off-by: Jens Axboe Stable-dep-of: 8ede3db5061b ("io_uring/net: fix overflow check in io_recvmsg_mshot_prep()") Signed-off-by: Sasha Levin --- io_uring/net.c | 271 ++++++++++++++++++++++++++----------------------- 1 file changed, 142 insertions(+), 129 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index c062ce66af12c..c770961079749 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,16 +183,150 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } +static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) +{ + int hdr; + + if (iomsg->namelen < 0) + return true; + if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + iomsg->namelen, &hdr)) + return true; + if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) + return true; + + return false; +} + +#ifdef CONFIG_COMPAT +static int __io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct compat_msghdr msg; + struct compat_iovec __user *uiov; + int ret; + + if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) + return -EFAULT; + + ret = __get_compat_msghdr(&iomsg->msg, &msg, addr); + if (ret) + return ret; + + uiov = compat_ptr(msg.msg_iov); + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + + iomsg->free_iov = NULL; + if (msg.msg_iovlen == 0) { + sr->len = 0; + } else if (msg.msg_iovlen > 1) { + return -EINVAL; + } else { + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + sr->len = clen; + } + + if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { + iomsg->namelen = msg.msg_namelen; + iomsg->controllen = msg.msg_controllen; + if (io_recvmsg_multishot_overflow(iomsg)) + return -EOVERFLOW; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg.msg_iovlen, + UIO_FASTIOV, &iomsg->free_iov, + &iomsg->msg.msg_iter, true); + if (unlikely(ret < 0)) + return ret; + + return 0; +} +#endif + +static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; + int ret; + + if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) + return -EFAULT; + + ret = __copy_msghdr(&iomsg->msg, &msg, addr); + if (ret) + return ret; + + if (req->flags & REQ_F_BUFFER_SELECT) { + if (msg.msg_iovlen == 0) { + sr->len = iomsg->fast_iov[0].iov_len = 0; + iomsg->fast_iov[0].iov_base = NULL; + iomsg->free_iov = NULL; + } else if (msg.msg_iovlen > 1) { + return -EINVAL; + } else { + if (copy_from_user(iomsg->fast_iov, msg.msg_iov, + sizeof(*msg.msg_iov))) + return -EFAULT; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; + } + + if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { + iomsg->namelen = msg.msg_namelen; + iomsg->controllen = msg.msg_controllen; + if (io_recvmsg_multishot_overflow(iomsg)) + return -EOVERFLOW; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, + &iomsg->free_iov, &iomsg->msg.msg_iter, false); + if (unlikely(ret < 0)) + return ret; + + return 0; +} + +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + return __io_compat_msg_copy_hdr(req, iomsg, addr, ddir); +#endif + + return __io_msg_copy_hdr(req, iomsg, addr, ddir); +} + static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); int ret; - iomsg->msg.msg_name = &iomsg->addr; - iomsg->free_iov = iomsg->fast_iov; - ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, - &iomsg->free_iov); + ret = io_msg_copy_hdr(req, iomsg, NULL, ITER_SOURCE); + if (ret) + return ret; + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -415,142 +549,21 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) -{ - int hdr; - - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; - - return false; -} - -static int __io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) - return -EFAULT; - - ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { - sr->len = iomsg->fast_iov[0].iov_len = 0; - iomsg->fast_iov[0].iov_base = NULL; - iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) - return -EFAULT; - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, - false); - if (ret > 0) - ret = 0; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; - struct compat_iovec __user *uiov; - int ret; - - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) - return -EFAULT; - - ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); - if (req->flags & REQ_F_BUFFER_SELECT) { - compat_ssize_t clen; - - iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { - sr->len = 0; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - sr->len = clen; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); - if (ret < 0) - return ret; - } - - return 0; -} -#endif - static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - iomsg->msg.msg_name = &iomsg->addr; - iomsg->msg.msg_iter.nr_segs = 0; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_recvmsg_copy_hdr(req, iomsg); -#endif - - return __io_recvmsg_copy_hdr(req, iomsg); + return io_msg_copy_hdr(req, iomsg, &iomsg->uaddr, ITER_DEST); } int io_recvmsg_prep_async(struct io_kiocb *req) { + struct io_async_msghdr *iomsg; int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; - ret = io_recvmsg_copy_hdr(req, req->async_data); + iomsg = req->async_data; + ret = io_recvmsg_copy_hdr(req, iomsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; -- GitLab From aba7b2140e82196caae71d07a1397d2d5afbffed Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Feb 2024 11:09:20 -0700 Subject: [PATCH 0748/1418] io_uring/net: move receive multishot out of the generic msghdr path [ Upstream commit c55978024d123d43808ab393a0a4ce3ce8568150 ] Move the actual user_msghdr / compat_msghdr into the send and receive sides, respectively, so we can move the uaddr receive handling into its own handler, and ditto the multishot with buffer selection logic. Signed-off-by: Jens Axboe Stable-dep-of: 8ede3db5061b ("io_uring/net: fix overflow check in io_recvmsg_mshot_prep()") Signed-off-by: Sasha Levin --- io_uring/net.c | 161 ++++++++++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 70 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index c770961079749..b273914ed99f0 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,46 +183,26 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) -{ - int hdr; - - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; - - return false; -} - #ifdef CONFIG_COMPAT -static int __io_compat_msg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct compat_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; struct compat_iovec __user *uiov; int ret; - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) + if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) return -EFAULT; - ret = __get_compat_msghdr(&iomsg->msg, &msg, addr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); + uiov = compat_ptr(msg->msg_iov); if (req->flags & REQ_F_BUFFER_SELECT) { compat_ssize_t clen; iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { + if (msg->msg_iovlen == 0) { sr->len = 0; - } else if (msg.msg_iovlen > 1) { + } else if (msg->msg_iovlen > 1) { return -EINVAL; } else { if (!access_ok(uiov, sizeof(*uiov))) @@ -234,18 +214,11 @@ static int __io_compat_msg_copy_hdr(struct io_kiocb *req, sr->len = clen; } - if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - return 0; } iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg.msg_iovlen, + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, true); if (unlikely(ret < 0)) @@ -255,47 +228,35 @@ static int __io_compat_msg_copy_hdr(struct io_kiocb *req, } #endif -static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct user_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; int ret; - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) + if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg))) return -EFAULT; - ret = __copy_msghdr(&iomsg->msg, &msg, addr); - if (ret) - return ret; - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { + if (msg->msg_iovlen == 0) { sr->len = iomsg->fast_iov[0].iov_len = 0; iomsg->fast_iov[0].iov_base = NULL; iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { + } else if (msg->msg_iovlen > 1) { return -EINVAL; } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, - sizeof(*msg.msg_iov))) + if (copy_from_user(iomsg->fast_iov, msg->msg_iov, + sizeof(*msg->msg_iov))) return -EFAULT; sr->len = iomsg->fast_iov[0].iov_len; iomsg->free_iov = NULL; } - if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - return 0; } iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ddir, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, + ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, false); if (unlikely(ret < 0)) return ret; @@ -303,30 +264,34 @@ static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg return 0; } -static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_sendmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; + int ret; + iomsg->msg.msg_name = &iomsg->addr; iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_msg_copy_hdr(req, iomsg, addr, ddir); -#endif + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; - return __io_msg_copy_hdr(req, iomsg, addr, ddir); -} + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); + if (unlikely(ret)) + return ret; -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - int ret; + return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + } +#endif - ret = io_msg_copy_hdr(req, iomsg, NULL, ITER_SOURCE); - if (ret) + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); + if (unlikely(ret)) return ret; + ret = __copy_msghdr(&iomsg->msg, &msg, NULL); + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -549,10 +514,66 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_recvmsg_mshot_prep(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + size_t namelen, size_t controllen) +{ + if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == + (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { + int hdr; + + if (unlikely(namelen < 0)) + return -EOVERFLOW; + if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + namelen, &hdr)) + return -EOVERFLOW; + if (check_add_overflow(hdr, (int)controllen, &hdr)) + return -EOVERFLOW; + + iomsg->namelen = namelen; + iomsg->controllen = controllen; + return 0; + } + + return 0; +} + static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - return io_msg_copy_hdr(req, iomsg, &iomsg->uaddr, ITER_DEST); + struct user_msghdr msg; + int ret; + + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; + + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, + cmsg.msg_controllen); + } +#endif + + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, + msg.msg_controllen); } int io_recvmsg_prep_async(struct io_kiocb *req) -- GitLab From 868ec868616438df487b9e2baa5a99f8662cc47c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Mar 2024 18:29:39 +0300 Subject: [PATCH 0749/1418] io_uring/net: fix overflow check in io_recvmsg_mshot_prep() [ Upstream commit 8ede3db5061bb1fe28e2c9683329aafa89d2b1b4 ] The "controllen" variable is type size_t (unsigned long). Casting it to int could lead to an integer underflow. The check_add_overflow() function considers the type of the destination which is type int. If we add two positive values and the result cannot fit in an integer then that's counted as an overflow. However, if we cast "controllen" to an int and it turns negative, then negative values *can* fit into an int type so there is no overflow. Good: 100 + (unsigned long)-4 = 96 <-- overflow Bad: 100 + (int)-4 = 96 <-- no overflow I deleted the cast of the sizeof() as well. That's not a bug but the cast is unnecessary. Fixes: 9b0fc3c054ff ("io_uring: fix types in io_recvmsg_multishot_overflow") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/138bd2e2-ede8-4bcc-aa7b-f3d9de167a37@moroto.mountain Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index b273914ed99f0..9fc0ffb0b6c12 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -524,10 +524,10 @@ static int io_recvmsg_mshot_prep(struct io_kiocb *req, if (unlikely(namelen < 0)) return -EOVERFLOW; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), namelen, &hdr)) return -EOVERFLOW; - if (check_add_overflow(hdr, (int)controllen, &hdr)) + if (check_add_overflow(hdr, controllen, &hdr)) return -EOVERFLOW; iomsg->namelen = namelen; -- GitLab From 74ca3ef68d2f449bc848c0a814cefc487bf755fa Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Tue, 5 Mar 2024 16:20:48 +0800 Subject: [PATCH 0750/1418] aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts [ Upstream commit f98364e926626c678fb4b9004b75cacf92ff0662 ] This patch is against CVE-2023-6270. The description of cve is: A flaw was found in the ATA over Ethernet (AoE) driver in the Linux kernel. The aoecmd_cfg_pkts() function improperly updates the refcnt on `struct net_device`, and a use-after-free can be triggered by racing between the free on the struct and the access through the `skbtxq` global queue. This could lead to a denial of service condition or potential code execution. In aoecmd_cfg_pkts(), it always calls dev_put(ifp) when skb initial code is finished. But the net_device ifp will still be used in later tx()->dev_queue_xmit() in kthread. Which means that the dev_put(ifp) should NOT be called in the success path of skb initial code in aoecmd_cfg_pkts(). Otherwise tx() may run into use-after-free because the net_device is freed. This patch removed the dev_put(ifp) in the success path in aoecmd_cfg_pkts(), and added dev_put() after skb xmit in tx(). Link: https://nvd.nist.gov/vuln/detail/CVE-2023-6270 Fixes: 7562f876cd93 ("[NET]: Rework dev_base via list_head (v3)") Signed-off-by: Chun-Yi Lee Link: https://lore.kernel.org/r/20240305082048.25526-1-jlee@suse.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/aoe/aoecmd.c | 12 ++++++------ drivers/block/aoe/aoenet.c | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d7317425be510..cc9077b588d7e 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu rcu_read_lock(); for_each_netdev_rcu(&init_net, ifp) { dev_hold(ifp); - if (!is_aoe_netif(ifp)) - goto cont; + if (!is_aoe_netif(ifp)) { + dev_put(ifp); + continue; + } skb = new_skb(sizeof *h + sizeof *ch); if (skb == NULL) { printk(KERN_INFO "aoe: skb alloc failure\n"); - goto cont; + dev_put(ifp); + continue; } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; @@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; - -cont: - dev_put(ifp); } rcu_read_unlock(); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 63773a90581dd..1e66c7a188a12 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -64,6 +64,7 @@ tx(int id) __must_hold(&txlock) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); + dev_put(ifp); spin_lock_irq(&txlock); } return 0; -- GitLab From f0439b7d67b827615a52dad23a2289658a65b9a3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 22 Jan 2024 10:08:07 -0800 Subject: [PATCH 0751/1418] x86/resctrl: Implement new mba_MBps throttling heuristic [ Upstream commit c2427e70c1630d98966375fffc2b713ab9768a94 ] The mba_MBps feedback loop increases throttling when a group is using more bandwidth than the target set by the user in the schemata file, and decreases throttling when below target. To avoid possibly stepping throttling up and down on every poll a flag "delta_comp" is set whenever throttling is changed to indicate that the actual change in bandwidth should be recorded on the next poll in "delta_bw". Throttling is only reduced if the current bandwidth plus delta_bw is below the user target. This algorithm works well if the workload has steady bandwidth needs. But it can go badly wrong if the workload moves to a different phase just as the throttling level changed. E.g. if the workload becomes essentially idle right as throttling level is increased, the value calculated for delta_bw will be more or less the old bandwidth level. If the workload then resumes, Linux may never reduce throttling because current bandwidth plus delta_bw is above the target set by the user. Implement a simpler heuristic by assuming that in the worst case the currently measured bandwidth is being controlled by the current level of throttling. Compute how much it may increase if throttling is relaxed to the next higher level. If that is still below the user target, then it is ok to reduce the amount of throttling. Fixes: ba0f26d8529c ("x86/intel_rdt/mba_sc: Prepare for feedback loop") Reported-by: Xiaochen Shen Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Xiaochen Shen Link: https://lore.kernel.org/r/20240122180807.70518-1-tony.luck@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/internal.h | 4 --- arch/x86/kernel/cpu/resctrl/monitor.c | 42 ++++++-------------------- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0b5c6c76f6f7b..4761d489a117a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -281,14 +281,10 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @prev_bw_bytes: Previous bytes value read for bandwidth calculation * @prev_bw: The most recent bandwidth in MBps - * @delta_bw: Difference between the current and previous bandwidth - * @delta_comp: Indicates whether to compute the delta_bw */ struct mbm_state { u64 prev_bw_bytes; u32 prev_bw; - u32 delta_bw; - bool delta_comp; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 77538abeb72af..b9adb707750c6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -428,9 +428,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) cur_bw = bytes / SZ_1M; - if (m->delta_comp) - m->delta_bw = abs(cur_bw - m->prev_bw); - m->delta_comp = false; m->prev_bw = cur_bw; } @@ -508,11 +505,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; - u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; struct list_head *head; struct rdtgroup *entry; + u32 cur_bw, user_bw; if (!is_mbm_local_enabled()) return; @@ -531,7 +528,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) cur_bw = pmbm_data->prev_bw; user_bw = dom_mba->mbps_val[closid]; - delta_bw = pmbm_data->delta_bw; /* MBA resource doesn't support CDP */ cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); @@ -543,49 +539,31 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) list_for_each_entry(entry, head, mon.crdtgrp_list) { cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; cur_bw += cmbm_data->prev_bw; - delta_bw += cmbm_data->delta_bw; } /* * Scale up/down the bandwidth linearly for the ctrl group. The * bandwidth step is the bandwidth granularity specified by the * hardware. - * - * The delta_bw is used when increasing the bandwidth so that we - * dont alternately increase and decrease the control values - * continuously. - * - * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if - * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep - * switching between 90 and 110 continuously if we only check - * cur_bw < user_bw. + * Always increase throttling if current bandwidth is above the + * target set by user. + * But avoid thrashing up and down on every poll by checking + * whether a decrease in throttling is likely to push the group + * back over target. E.g. if currently throttling to 30% of bandwidth + * on a system with 10% granularity steps, check whether moving to + * 40% would go past the limit by multiplying current bandwidth by + * "(30 + 10) / 30". */ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { new_msr_val = cur_msr_val - r_mba->membw.bw_gran; } else if (cur_msr_val < MAX_MBA_BW && - (user_bw > (cur_bw + delta_bw))) { + (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { new_msr_val = cur_msr_val + r_mba->membw.bw_gran; } else { return; } resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); - - /* - * Delta values are updated dynamically package wise for each - * rdtgrp every time the throttle MSR changes value. - * - * This is because (1)the increase in bandwidth is not perfectly - * linear and only "approximately" linear even when the hardware - * says it is linear.(2)Also since MBA is a core specific - * mechanism, the delta values vary based on number of cores used - * by the rdtgrp. - */ - pmbm_data->delta_comp = true; - list_for_each_entry(entry, head, mon.crdtgrp_list) { - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; - cmbm_data->delta_comp = true; - } } static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) -- GitLab From 3e5e8248c0a4cd9d6e75208b21ff31ce5265e906 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jan 2024 17:39:19 +0100 Subject: [PATCH 0752/1418] x86/sme: Fix memory encryption setting if enabled by default and not overridden [ Upstream commit e814b59e6c2b11f5a3d007b2e61f7d550c354c3a ] Commit cbebd68f59f0 ("x86/mm: Fix use of uninitialized buffer in sme_enable()") 'fixed' an issue in sme_enable() detected by static analysis, and broke the common case in the process. cmdline_find_option() will return < 0 on an error, or when the command line argument does not appear at all. In this particular case, the latter is not an error condition, and so the early exit is wrong. Instead, without mem_encrypt= on the command line, the compile time default should be honoured, which could be to enable memory encryption, and this is currently broken. Fix it by setting sme_me_mask to a preliminary value based on the compile time default, and only omitting the command line argument test when cmdline_find_option() returns an error. [ bp: Drop active_by_default while at it. ] Fixes: cbebd68f59f0 ("x86/mm: Fix use of uninitialized buffer in sme_enable()") Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240126163918.2908990-2-ardb+git@google.com Signed-off-by: Sasha Levin --- arch/x86/mm/mem_encrypt_identity.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index d94ebd8acdfde..a11a6ebbf5ecf 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -507,7 +507,6 @@ void __init sme_enable(struct boot_params *bp) const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; - bool active_by_default; unsigned long me_mask; char buffer[16]; bool snp; @@ -593,22 +592,19 @@ void __init sme_enable(struct boot_params *bp) : "p" (sme_cmdline_off)); if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - active_by_default = true; - else - active_by_default = false; + sme_me_mask = me_mask; cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) - return; + goto out; if (!strncmp(buffer, cmdline_on, sizeof(buffer))) sme_me_mask = me_mask; else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) sme_me_mask = 0; - else - sme_me_mask = active_by_default ? me_mask : 0; + out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; -- GitLab From 081bf64a7e9ad999a833bfb4d5412685f8818e9a Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:39 +0100 Subject: [PATCH 0753/1418] timekeeping: Fix cross-timestamp interpolation on counter wrap [ Upstream commit 84dccadd3e2a3f1a373826ad71e5ced5e76b0c00 ] cycle_between() decides whether get_device_system_crosststamp() will interpolate for older counter readings. cycle_between() yields wrong results for a counter wrap-around where after < before < test, and for the case after < test < before. Fix the comparison logic. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20231218073849.35294-2-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 221c8c404973a..c168931c78e01 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1186,7 +1186,7 @@ static bool cycle_between(u64 before, u64 test, u64 after) { if (test > before && test < after) return true; - if (test < before && before > after) + if (before > after && (test > before || test < after)) return true; return false; } -- GitLab From 8a1d2ecd9bb816f515f069e9cc0a04fddf2a8872 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:40 +0100 Subject: [PATCH 0754/1418] timekeeping: Fix cross-timestamp interpolation corner case decision [ Upstream commit 87a41130881995f82f7adbafbfeddaebfb35f0ef ] The cycle_between() helper checks if parameter test is in the open interval (before, after). Colloquially speaking, this also applies to the counter wrap-around special case before > after. get_device_system_crosststamp() currently uses cycle_between() at the first call site to decide whether to interpolate for older counter readings. get_device_system_crosststamp() has the following problem with cycle_between() testing against an open interval: Assume that, by chance, cycles == tk->tkr_mono.cycle_last (in the following, "cycle_last" for brevity). Then, cycle_between() at the first call site, with effective argument values cycle_between(cycle_last, cycles, now), returns false, enabling interpolation. During interpolation, get_device_system_crosststamp() will then call cycle_between() at the second call site (if a history_begin was supplied). The effective argument values are cycle_between(history_begin->cycles, cycles, cycles), since system_counterval.cycles == interval_start == cycles, per the assumption. Due to the test against the open interval, cycle_between() returns false again. This causes get_device_system_crosststamp() to return -EINVAL. This failure should be avoided, since get_device_system_crosststamp() works both when cycles follows cycle_last (no interpolation), and when cycles precedes cycle_last (interpolation). For the case cycles == cycle_last, interpolation is actually unneeded. Fix this by changing cycle_between() into timestamp_in_interval(), which now checks against the closed interval, rather than the open interval. This changes the get_device_system_crosststamp() behavior for three corner cases: 1. Bypass interpolation in the case cycles == tk->tkr_mono.cycle_last, fixing the problem described above. 2. At the first timestamp_in_interval() call site, cycles == now no longer causes failure. 3. At the second timestamp_in_interval() call site, history_begin->cycles == system_counterval.cycles no longer causes failure. adjust_historical_crosststamp() also works for this corner case, where partial_history_cycles == total_history_cycles. These behavioral changes should not cause any problems. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231218073849.35294-3-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c168931c78e01..1749a712f72d1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1180,13 +1180,15 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, } /* - * cycle_between - true if test occurs chronologically between before and after + * timestamp_in_interval - true if ts is chronologically in [start, end] + * + * True if ts occurs chronologically at or after start, and before or at end. */ -static bool cycle_between(u64 before, u64 test, u64 after) +static bool timestamp_in_interval(u64 start, u64 end, u64 ts) { - if (test > before && test < after) + if (ts >= start && ts <= end) return true; - if (before > after && (test > before || test < after)) + if (start > end && (ts >= start || ts <= end)) return true; return false; } @@ -1246,7 +1248,7 @@ int get_device_system_crosststamp(int (*get_time_fn) */ now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; - if (!cycle_between(interval_start, cycles, now)) { + if (!timestamp_in_interval(interval_start, now, cycles)) { clock_was_set_seq = tk->clock_was_set_seq; cs_was_changed_seq = tk->cs_was_changed_seq; cycles = interval_start; @@ -1277,13 +1279,13 @@ int get_device_system_crosststamp(int (*get_time_fn) bool discontinuity; /* - * Check that the counter value occurs after the provided + * Check that the counter value is not before the provided * history reference and that the history doesn't cross a * clocksource change */ if (!history_begin || - !cycle_between(history_begin->cycles, - system_counterval.cycles, cycles) || + !timestamp_in_interval(history_begin->cycles, + cycles, system_counterval.cycles) || history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL; partial_history_cycles = cycles - system_counterval.cycles; -- GitLab From 9388721260f6e54ebc2e1e65b64f52c072ed7f83 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:41 +0100 Subject: [PATCH 0755/1418] timekeeping: Fix cross-timestamp interpolation for non-x86 [ Upstream commit 14274d0bd31b4debf28284604589f596ad2e99f2 ] So far, get_device_system_crosststamp() unconditionally passes system_counterval.cycles to timekeeping_cycles_to_ns(). But when interpolating system time (do_interp == true), system_counterval.cycles is before tkr_mono.cycle_last, contrary to the timekeeping_cycles_to_ns() expectations. On x86, CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE will mitigate on interpolating, setting delta to 0. With delta == 0, xtstamp->sys_monoraw and xtstamp->sys_realtime are then set to the last update time, as implicitly expected by adjust_historical_crosststamp(). On other architectures, the resulting nonsense xtstamp->sys_monoraw and xtstamp->sys_realtime corrupt the xtstamp (ts) adjustment in adjust_historical_crosststamp(). Fix this by deriving xtstamp->sys_monoraw and xtstamp->sys_realtime from the last update time when interpolating, by using the local variable "cycles". The local variable already has the right value when interpolating, unlike system_counterval.cycles. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20231218073849.35294-4-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1749a712f72d1..b158cbef4d8dc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1261,10 +1261,8 @@ int get_device_system_crosststamp(int (*get_time_fn) tk_core.timekeeper.offs_real); base_raw = tk->tkr_raw.base; - nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, - system_counterval.cycles); - nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, - system_counterval.cycles); + nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); + nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); } while (read_seqcount_retry(&tk_core.seq, seq)); xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); -- GitLab From 790ae577eff350695ba59eb7eafcce415d0a702b Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Wed, 10 Jan 2024 14:17:06 +0100 Subject: [PATCH 0756/1418] sched/fair: Take the scheduling domain into account in select_idle_smt() [ Upstream commit 8aeaffef8c6eceab0e1498486fdd4f3dc3b7066c ] When picking a CPU on task wakeup, select_idle_smt() has to take into account the scheduling domain of @target. This is because the "isolcpus" kernel command line option can remove CPUs from the domain to isolate them from other SMT siblings. This fix checks if the candidate CPU is in the target scheduling domain. Commit: df3cb4ea1fb6 ("sched/fair: Fix wrong cpu selecting from isolated domain") ... originally introduced this fix by adding the check of the scheduling domain in the loop. However, commit: 3e6efe87cd5cc ("sched/fair: Remove redundant check in select_idle_smt()") ... accidentally removed the check. Bring it back. Fixes: 3e6efe87cd5c ("sched/fair: Remove redundant check in select_idle_smt()") Signed-off-by: Keisuke Nishimura Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240110131707.437301-1-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2558ab9033bee..1c4e54fffb8b6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6678,13 +6678,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu /* * Scan the local SMT mask for idle CPUs. */ -static int select_idle_smt(struct task_struct *p, int target) +static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { int cpu; for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { if (cpu == target) continue; + /* + * Check if the CPU is in the LLC scheduling domain of @target. + * Due to isolcpus, there is no guarantee that all the siblings are in the domain. + */ + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) + continue; if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; } @@ -6708,7 +6714,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma return __select_idle_cpu(core, p); } -static inline int select_idle_smt(struct task_struct *p, int target) +static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { return -1; } @@ -6970,7 +6976,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) has_idle_core = test_idle_cores(target); if (!has_idle_core && cpus_share_cache(prev, target)) { - i = select_idle_smt(p, prev); + i = select_idle_smt(p, sd, prev); if ((unsigned int)i < nr_cpumask_bits) return i; } -- GitLab From 13fa3326efb257f0fa6aaf17ef509de79e47e301 Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Wed, 10 Jan 2024 14:17:07 +0100 Subject: [PATCH 0757/1418] sched/fair: Take the scheduling domain into account in select_idle_core() [ Upstream commit 23d04d8c6b8ec339057264659b7834027f3e6a63 ] When picking a CPU on task wakeup, select_idle_core() has to take into account the scheduling domain where the function looks for the CPU. This is because the "isolcpus" kernel command line option can remove CPUs from the domain to isolate them from other SMT siblings. This change replaces the set of CPUs allowed to run the task from p->cpus_ptr by the intersection of p->cpus_ptr and sched_domain_span(sd) which is stored in the 'cpus' argument provided by select_idle_cpu(). Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()") Signed-off-by: Keisuke Nishimura Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240110131707.437301-2-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c4e54fffb8b6..91c101ecfef9f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6656,7 +6656,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle = false; if (*idle_cpu == -1) { - if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) { *idle_cpu = cpu; break; } @@ -6664,7 +6664,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu } break; } - if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus)) *idle_cpu = cpu; } -- GitLab From 90f089d77e38db1c48629f111f3c8c336be1bc38 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Sun, 17 Dec 2023 13:29:01 +0200 Subject: [PATCH 0758/1418] wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() [ Upstream commit ad25ee36f00172f7d53242dc77c69fff7ced0755 ] We should check whether the WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT tlv is present before accessing it, otherwise a null pointer deference error will occur. Fixes: dc405152bb64 ("ath10k: handle mgmt tx completion event") Signed-off-by: Xingyuan Mo Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20231208043433.271449-1-hdthky0@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 876410a47d1d2..4d5009604eee7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -844,6 +844,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb, } ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } arg->desc_id = ev->desc_id; arg->status = ev->status; -- GitLab From c67698325c68f8768db858f5c87c34823421746d Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:33 +0000 Subject: [PATCH 0759/1418] wifi: b43: Stop/wake correct queue in DMA Tx path when QoS is disabled [ Upstream commit 9636951e4468f02c72cc75a82dc65d003077edbc ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop/wake queue 0 when QoS is disabled to prevent trying to stop/wake a non-existent queue and failing to stop/wake the actual queue instantiated. Log of issue before change (with kernel parameter qos=0): [ +5.112651] ------------[ cut here ]------------ [ +0.000005] WARNING: CPU: 7 PID: 25513 at net/mac80211/util.c:449 __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000067] Modules linked in: b43(O) snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_chain_nat xt_MASQUERADE nf_nat xfrm_user xfrm_algo xt_addrtype overlay ccm af_packet amdgpu snd_hda_codec_cirrus snd_hda_codec_generic ledtrig_audio drm_exec amdxcp gpu_sched xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6t_rpfilter ipt_rpfilter xt_pkttype xt_LOG nf_log_syslog xt_tcpudp nft_compat nf_tables nfnetlink sch_fq_codel btusb uinput iTCO_wdt ctr btrtl intel_pmc_bxt i915 intel_rapl_msr mei_hdcp mei_pxp joydev at24 watchdog btintel atkbd libps2 serio radeon btbcm vivaldi_fmap btmtk intel_rapl_common snd_hda_codec_hdmi bluetooth uvcvideo nls_iso8859_1 applesmc nls_cp437 x86_pkg_temp_thermal snd_hda_intel intel_powerclamp vfat videobuf2_vmalloc coretemp fat snd_intel_dspcfg crc32_pclmul uvc polyval_clmulni snd_intel_sdw_acpi loop videobuf2_memops snd_hda_codec tun drm_suballoc_helper polyval_generic drm_ttm_helper drm_buddy tap ecdh_generic videobuf2_v4l2 gf128mul macvlan ttm ghash_clmulni_intel ecc tg3 [ +0.000044] videodev bridge snd_hda_core rapl crc16 drm_display_helper cec mousedev snd_hwdep evdev intel_cstate bcm5974 hid_appleir videobuf2_common stp mac_hid libphy snd_pcm drm_kms_helper acpi_als mei_me intel_uncore llc mc snd_timer intel_gtt industrialio_triggered_buffer apple_mfi_fastcharge i2c_i801 mei snd lpc_ich agpgart ptp i2c_smbus thunderbolt apple_gmux i2c_algo_bit kfifo_buf video industrialio soundcore pps_core wmi tiny_power_button sbs sbshc button ac cordic bcma mac80211 cfg80211 ssb rfkill libarc4 kvm_intel kvm drm irqbypass fuse backlight firmware_class efi_pstore configfs efivarfs dmi_sysfs ip_tables x_tables autofs4 dm_crypt cbc encrypted_keys trusted asn1_encoder tee tpm rng_core input_leds hid_apple led_class hid_generic usbhid hid sd_mod t10_pi crc64_rocksoft crc64 crc_t10dif crct10dif_generic ahci libahci libata uhci_hcd ehci_pci ehci_hcd crct10dif_pclmul crct10dif_common sha512_ssse3 sha512_generic sha256_ssse3 sha1_ssse3 aesni_intel usbcore scsi_mod libaes crypto_simd cryptd scsi_common [ +0.000055] usb_common rtc_cmos btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_snapshot dm_bufio dm_mod dax [last unloaded: b43(O)] [ +0.000009] CPU: 7 PID: 25513 Comm: irq/17-b43 Tainted: G W O 6.6.7 #1-NixOS [ +0.000003] Hardware name: Apple Inc. MacBookPro8,3/Mac-942459F5819B171B, BIOS 87.0.0.0.0 06/13/2019 [ +0.000001] RIP: 0010:__ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000046] Code: 00 45 85 e4 0f 85 9b 00 00 00 48 8d bd 40 09 00 00 f0 48 0f ba ad 48 09 00 00 00 72 0f 5b 5d 41 5c 41 5d 41 5e e9 cb 6d 3c d0 <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc 48 8d b4 16 94 00 00 [ +0.000002] RSP: 0018:ffffc90003c77d60 EFLAGS: 00010097 [ +0.000001] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000000 [ +0.000001] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff88820b924900 [ +0.000002] RBP: ffff88820b924900 R08: ffffc90003c77d90 R09: 000000000003bfd0 [ +0.000001] R10: ffff88820b924900 R11: ffffc90003c77c68 R12: 0000000000000000 [ +0.000001] R13: 0000000000000000 R14: ffffc90003c77d90 R15: ffffffffc0fa6f40 [ +0.000001] FS: 0000000000000000(0000) GS:ffff88846fb80000(0000) knlGS:0000000000000000 [ +0.000001] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007fafda7ae008 CR3: 000000046d220005 CR4: 00000000000606e0 [ +0.000002] Call Trace: [ +0.000003] [ +0.000001] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000044] ? __warn+0x81/0x130 [ +0.000005] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000045] ? report_bug+0x171/0x1a0 [ +0.000004] ? handle_bug+0x41/0x70 [ +0.000004] ? exc_invalid_op+0x17/0x70 [ +0.000003] ? asm_exc_invalid_op+0x1a/0x20 [ +0.000005] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000043] ieee80211_wake_queue+0x4a/0x80 [mac80211] [ +0.000044] b43_dma_handle_txstatus+0x29c/0x3a0 [b43] [ +0.000016] ? __pfx_irq_thread_fn+0x10/0x10 [ +0.000002] b43_handle_txstatus+0x61/0x80 [b43] [ +0.000012] b43_interrupt_thread_handler+0x3f9/0x6b0 [b43] [ +0.000011] irq_thread_fn+0x23/0x60 [ +0.000002] irq_thread+0xfe/0x1c0 [ +0.000002] ? __pfx_irq_thread_dtor+0x10/0x10 [ +0.000001] ? __pfx_irq_thread+0x10/0x10 [ +0.000001] kthread+0xe8/0x120 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000003] ret_from_fork+0x34/0x50 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- [ +0.000065] ------------[ cut here ]------------ [ +0.000001] WARNING: CPU: 0 PID: 56077 at net/mac80211/util.c:514 __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000077] Modules linked in: b43(O) snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_chain_nat xt_MASQUERADE nf_nat xfrm_user xfrm_algo xt_addrtype overlay ccm af_packet amdgpu snd_hda_codec_cirrus snd_hda_codec_generic ledtrig_audio drm_exec amdxcp gpu_sched xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6t_rpfilter ipt_rpfilter xt_pkttype xt_LOG nf_log_syslog xt_tcpudp nft_compat nf_tables nfnetlink sch_fq_codel btusb uinput iTCO_wdt ctr btrtl intel_pmc_bxt i915 intel_rapl_msr mei_hdcp mei_pxp joydev at24 watchdog btintel atkbd libps2 serio radeon btbcm vivaldi_fmap btmtk intel_rapl_common snd_hda_codec_hdmi bluetooth uvcvideo nls_iso8859_1 applesmc nls_cp437 x86_pkg_temp_thermal snd_hda_intel intel_powerclamp vfat videobuf2_vmalloc coretemp fat snd_intel_dspcfg crc32_pclmul uvc polyval_clmulni snd_intel_sdw_acpi loop videobuf2_memops snd_hda_codec tun drm_suballoc_helper polyval_generic drm_ttm_helper drm_buddy tap ecdh_generic videobuf2_v4l2 gf128mul macvlan ttm ghash_clmulni_intel ecc tg3 [ +0.000073] videodev bridge snd_hda_core rapl crc16 drm_display_helper cec mousedev snd_hwdep evdev intel_cstate bcm5974 hid_appleir videobuf2_common stp mac_hid libphy snd_pcm drm_kms_helper acpi_als mei_me intel_uncore llc mc snd_timer intel_gtt industrialio_triggered_buffer apple_mfi_fastcharge i2c_i801 mei snd lpc_ich agpgart ptp i2c_smbus thunderbolt apple_gmux i2c_algo_bit kfifo_buf video industrialio soundcore pps_core wmi tiny_power_button sbs sbshc button ac cordic bcma mac80211 cfg80211 ssb rfkill libarc4 kvm_intel kvm drm irqbypass fuse backlight firmware_class efi_pstore configfs efivarfs dmi_sysfs ip_tables x_tables autofs4 dm_crypt cbc encrypted_keys trusted asn1_encoder tee tpm rng_core input_leds hid_apple led_class hid_generic usbhid hid sd_mod t10_pi crc64_rocksoft crc64 crc_t10dif crct10dif_generic ahci libahci libata uhci_hcd ehci_pci ehci_hcd crct10dif_pclmul crct10dif_common sha512_ssse3 sha512_generic sha256_ssse3 sha1_ssse3 aesni_intel usbcore scsi_mod libaes crypto_simd cryptd scsi_common [ +0.000084] usb_common rtc_cmos btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_snapshot dm_bufio dm_mod dax [last unloaded: b43] [ +0.000012] CPU: 0 PID: 56077 Comm: kworker/u16:17 Tainted: G W O 6.6.7 #1-NixOS [ +0.000003] Hardware name: Apple Inc. MacBookPro8,3/Mac-942459F5819B171B, BIOS 87.0.0.0.0 06/13/2019 [ +0.000001] Workqueue: phy7 b43_tx_work [b43] [ +0.000019] RIP: 0010:__ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000076] Code: 74 11 48 8b 78 08 0f b7 d6 89 e9 4c 89 e6 e8 ab f4 00 00 65 ff 0d 9c b7 34 3f 0f 85 55 ff ff ff 0f 1f 44 00 00 e9 4b ff ff ff <0f> 0b 5b 5d 41 5c 41 5d c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 [ +0.000002] RSP: 0000:ffffc90004157d50 EFLAGS: 00010097 [ +0.000002] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000000 [ +0.000002] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff8882d65d0900 [ +0.000002] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 [ +0.000001] R10: 00000000000000ff R11: ffff88814d0155a0 R12: ffff8882d65d0900 [ +0.000002] R13: 0000000000000000 R14: ffff8881002d2800 R15: 00000000000000d0 [ +0.000002] FS: 0000000000000000(0000) GS:ffff88846f800000(0000) knlGS:0000000000000000 [ +0.000003] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000002] CR2: 00007f2e8c10c880 CR3: 0000000385b66005 CR4: 00000000000606f0 [ +0.000002] Call Trace: [ +0.000001] [ +0.000001] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000075] ? __warn+0x81/0x130 [ +0.000004] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000075] ? report_bug+0x171/0x1a0 [ +0.000005] ? handle_bug+0x41/0x70 [ +0.000003] ? exc_invalid_op+0x17/0x70 [ +0.000004] ? asm_exc_invalid_op+0x1a/0x20 [ +0.000004] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000076] ieee80211_stop_queue+0x36/0x50 [mac80211] [ +0.000077] b43_dma_tx+0x550/0x780 [b43] [ +0.000023] b43_tx_work+0x90/0x130 [b43] [ +0.000018] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27b/0x3a0 [ +0.000004] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xe8/0x120 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000004] ret_from_fork+0x34/0x50 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000003] ret_from_fork_asm+0x1b/0x30 [ +0.000006] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: e6f5b934fba8 ("b43: Add QOS support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-2-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/b43.h | 16 ++++++++++++++++ drivers/net/wireless/broadcom/b43/dma.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h index 67b4bac048e58..c0d8fc0b22fb2 100644 --- a/drivers/net/wireless/broadcom/b43/b43.h +++ b/drivers/net/wireless/broadcom/b43/b43.h @@ -1082,6 +1082,22 @@ static inline bool b43_using_pio_transfers(struct b43_wldev *dev) return dev->__using_pio_transfers; } +static inline void b43_wake_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_wake_queue(dev->wl->hw, queue_prio); + else + ieee80211_wake_queue(dev->wl->hw, 0); +} + +static inline void b43_stop_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_stop_queue(dev->wl->hw, queue_prio); + else + ieee80211_stop_queue(dev->wl->hw, 0); +} + /* Message printing */ __printf(2, 3) void b43info(struct b43_wl *wl, const char *fmt, ...); __printf(2, 3) void b43err(struct b43_wl *wl, const char *fmt, ...); diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 9a7c62bd5e431..cfaf2f9d67b22 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1399,7 +1399,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb) should_inject_overflow(ring)) { /* This TX ring is full. */ unsigned int skb_mapping = skb_get_queue_mapping(skb); - ieee80211_stop_queue(dev->wl->hw, skb_mapping); + b43_stop_queue(dev, skb_mapping); dev->wl->tx_queue_stopped[skb_mapping] = true; ring->stopped = true; if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { @@ -1570,7 +1570,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, } else { /* If the driver queue is running wake the corresponding * mac80211 queue. */ - ieee80211_wake_queue(dev->wl->hw, ring->queue_prio); + b43_wake_queue(dev, ring->queue_prio); if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { b43dbg(dev->wl, "Woke up TX ring %d\n", ring->index); } -- GitLab From b6b6bdfca8bc0525e4b390f2a4aa8ac671efd9d3 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:45 +0000 Subject: [PATCH 0760/1418] wifi: b43: Stop/wake correct queue in PIO Tx path when QoS is disabled [ Upstream commit 77135a38f6c2f950d2306ac3d37cbb407e6243f2 ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop/wake queue 0 when QoS is disabled to prevent trying to stop/wake a non-existent queue and failing to stop/wake the actual queue instantiated. Fixes: 5100d5ac81b9 ("b43: Add PIO support for PCMCIA devices") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-3-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/pio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c index 8c28a9250cd19..cc19b589fa70d 100644 --- a/drivers/net/wireless/broadcom/b43/pio.c +++ b/drivers/net/wireless/broadcom/b43/pio.c @@ -525,7 +525,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (total_len > (q->buffer_size - q->buffer_used)) { /* Not enough memory on the queue. */ err = -EBUSY; - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; goto out; } @@ -552,7 +552,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) || (q->free_packet_slots == 0)) { /* The queue is full. */ - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; } @@ -587,7 +587,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, list_add(&pack->list, &q->packets_list); if (q->stopped) { - ieee80211_wake_queue(dev->wl->hw, q->queue_prio); + b43_wake_queue(dev, q->queue_prio); q->stopped = false; } } -- GitLab From 3033583a786ef2ada0e351d0940adef1193fba17 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:51 +0000 Subject: [PATCH 0761/1418] wifi: b43: Stop correct queue in DMA worker when QoS is disabled [ Upstream commit 581c8967d66c4961076dbbee356834e9c6777184 ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop queue 0 when QoS is disabled to prevent trying to stop a non-existent queue and failing to stop the actual queue instantiated. Fixes: bad691946966 ("b43: avoid packet losses in the dma worker code.") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-4-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index b2539a916fd04..c75f294ca1bc9 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -3603,7 +3603,7 @@ static void b43_tx_work(struct work_struct *work) err = b43_dma_tx(dev, skb); if (err == -ENOSPC) { wl->tx_queue_stopped[queue_num] = true; - ieee80211_stop_queue(wl->hw, queue_num); + b43_stop_queue(dev, queue_num); skb_queue_head(&wl->tx_queue[queue_num], skb); break; } @@ -3627,6 +3627,7 @@ static void b43_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) { struct b43_wl *wl = hw_to_b43_wl(hw); + u16 skb_queue_mapping; if (unlikely(skb->len < 2 + 2 + 6)) { /* Too short, this can't be a valid frame. */ @@ -3635,12 +3636,12 @@ static void b43_op_tx(struct ieee80211_hw *hw, } B43_WARN_ON(skb_shinfo(skb)->nr_frags); - skb_queue_tail(&wl->tx_queue[skb->queue_mapping], skb); - if (!wl->tx_queue_stopped[skb->queue_mapping]) { + skb_queue_mapping = skb_get_queue_mapping(skb); + skb_queue_tail(&wl->tx_queue[skb_queue_mapping], skb); + if (!wl->tx_queue_stopped[skb_queue_mapping]) ieee80211_queue_work(wl->hw, &wl->tx_work); - } else { - ieee80211_stop_queue(wl->hw, skb->queue_mapping); - } + else + b43_stop_queue(wl->current_dev, skb_queue_mapping); } static void b43_qos_params_upload(struct b43_wldev *dev, -- GitLab From b8dd353a1a914721558d42d5bfaf5d6a30adafa1 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:58 +0000 Subject: [PATCH 0762/1418] wifi: b43: Disable QoS for bcm4331 [ Upstream commit 09795bded2e725443fe4a4803cae2079cdaf7b26 ] bcm4331 seems to not function correctly with QoS support. This may be due to issues with currently available firmware or potentially a device specific issue. When queues that are not of the default "best effort" priority are selected, traffic appears to not transmit out of the hardware while no errors are returned. This behavior is present among all the other priority queues: video, voice, and background. While this can be worked around by setting a kernel parameter, the default behavior is problematic for most users and may be difficult to debug. This patch offers a working out-of-box experience for bcm4331 users. Log of the issue (using ssh low-priority traffic as an example): ssh -T -vvvv git@github.com OpenSSH_9.6p1, OpenSSL 3.0.12 24 Oct 2023 debug1: Reading configuration data /etc/ssh/ssh_config debug2: checking match for 'host * exec "/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0rypm7sh1i8js8w-gnupg-2.4.1/bin/gpg-connect-agent --quiet updatestartuptty /bye >/dev/null 2>&1'"' host github.com originally github.com debug3: /etc/ssh/ssh_config line 5: matched 'host "github.com"' debug1: Executing command: '/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0rypm7sh1i8js8w-gnupg-2.4.1/bin/gpg-connect-agent --quiet updatestartuptty /bye >/dev/null 2>&1'' debug3: command returned status 0 debug3: /etc/ssh/ssh_config line 5: matched 'exec "/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0r"' debug2: match found debug1: /etc/ssh/ssh_config line 9: Applying options for * debug3: expanded UserKnownHostsFile '~/.ssh/known_hosts' -> '/home/binary-eater/.ssh/known_hosts' debug3: expanded UserKnownHostsFile '~/.ssh/known_hosts2' -> '/home/binary-eater/.ssh/known_hosts2' debug2: resolving "github.com" port 22 debug3: resolve_host: lookup github.com:22 debug3: channel_clear_timeouts: clearing debug3: ssh_connect_direct: entering debug1: Connecting to github.com [192.30.255.113] port 22. debug3: set_sock_tos: set socket 3 IP_TOS 0x48 Fixes: e6f5b934fba8 ("b43: Add QOS support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-5-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index c75f294ca1bc9..bdfa68cc7ee2a 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -2587,7 +2587,8 @@ static void b43_request_firmware(struct work_struct *work) start_ieee80211: wl->hw->queues = B43_QOS_QUEUE_NUM; - if (!modparam_qos || dev->fw.opensource) + if (!modparam_qos || dev->fw.opensource || + dev->dev->chip_id == BCMA_CHIP_ID_BCM4331) wl->hw->queues = 1; err = ieee80211_register_hw(wl->hw); -- GitLab From d8766257c2e96df971e6a7c3870f2229dd75a40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 5 Jan 2024 08:57:32 +0100 Subject: [PATCH 0763/1418] wifi: wilc1000: fix declarations ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 535733e90e5d8912ebeccebb05b354a2d06ff459 ] Reorder parameters declaration in wilc_parse_join_bss_param to enforce reverse christmas tree Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240105075733.36331-2-alexis.lothore@bootlin.com Stable-dep-of: 205c50306acf ("wifi: wilc1000: fix RCU usage in connect path") Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index a1b75feec6edf..00ecf14afab01 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -374,13 +374,13 @@ out: void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto) { - struct wilc_join_bss_param *param; - struct ieee80211_p2p_noa_attr noa_attr; - u8 rates_len = 0; + const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie; + struct ieee80211_p2p_noa_attr noa_attr; + struct wilc_join_bss_param *param; + u8 rates_len = 0; int ret; - const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) -- GitLab From 745003b5917b610352f52fe0d11ef658d6471ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 5 Jan 2024 08:57:33 +0100 Subject: [PATCH 0764/1418] wifi: wilc1000: fix RCU usage in connect path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 205c50306acf58a335eb19fa84e40140f4fe814f ] With lockdep enabled, calls to the connect function from cfg802.11 layer lead to the following warning: ============================= WARNING: suspicious RCU usage 6.7.0-rc1-wt+ #333 Not tainted ----------------------------- drivers/net/wireless/microchip/wilc1000/hif.c:386 suspicious rcu_dereference_check() usage! [...] stack backtrace: CPU: 0 PID: 100 Comm: wpa_supplicant Not tainted 6.7.0-rc1-wt+ #333 Hardware name: Atmel SAMA5 unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x48 dump_stack_lvl from wilc_parse_join_bss_param+0x7dc/0x7f4 wilc_parse_join_bss_param from connect+0x2c4/0x648 connect from cfg80211_connect+0x30c/0xb74 cfg80211_connect from nl80211_connect+0x860/0xa94 nl80211_connect from genl_rcv_msg+0x3fc/0x59c genl_rcv_msg from netlink_rcv_skb+0xd0/0x1f8 netlink_rcv_skb from genl_rcv+0x2c/0x3c genl_rcv from netlink_unicast+0x3b0/0x550 netlink_unicast from netlink_sendmsg+0x368/0x688 netlink_sendmsg from ____sys_sendmsg+0x190/0x430 ____sys_sendmsg from ___sys_sendmsg+0x110/0x158 ___sys_sendmsg from sys_sendmsg+0xe8/0x150 sys_sendmsg from ret_fast_syscall+0x0/0x1c This warning is emitted because in the connect path, when trying to parse target BSS parameters, we dereference a RCU pointer whithout being in RCU critical section. Fix RCU dereference usage by moving it to a RCU read critical section. To avoid wrapping the whole wilc_parse_join_bss_param under the critical section, just use the critical section to copy ies data Fixes: c460495ee072 ("staging: wilc1000: fix incorrent type in initializer") Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240105075733.36331-3-alexis.lothore@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index 00ecf14afab01..5eb02902e875a 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -374,38 +374,49 @@ out: void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto) { - const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); - const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; + const u8 *ies_data, *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie; struct ieee80211_p2p_noa_attr noa_attr; + const struct cfg80211_bss_ies *ies; struct wilc_join_bss_param *param; - u8 rates_len = 0; + u8 rates_len = 0, ies_len; int ret; param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) return NULL; + rcu_read_lock(); + ies = rcu_dereference(bss->ies); + ies_data = kmemdup(ies->data, ies->len, GFP_ATOMIC); + if (!ies_data) { + rcu_read_unlock(); + kfree(param); + return NULL; + } + ies_len = ies->len; + rcu_read_unlock(); + param->beacon_period = cpu_to_le16(bss->beacon_interval); param->cap_info = cpu_to_le16(bss->capability); param->bss_type = WILC_FW_BSS_TYPE_INFRA; param->ch = ieee80211_frequency_to_channel(bss->channel->center_freq); ether_addr_copy(param->bssid, bss->bssid); - ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies_data, ies_len); if (ssid_elm) { if (ssid_elm[1] <= IEEE80211_MAX_SSID_LEN) memcpy(param->ssid, ssid_elm + 2, ssid_elm[1]); } - tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len); + tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies_data, ies_len); if (tim_elm && tim_elm[1] >= 2) param->dtim_period = tim_elm[3]; memset(param->p_suites, 0xFF, 3); memset(param->akm_suites, 0xFF, 3); - rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies->data, ies->len); + rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies_data, ies_len); if (rates_ie) { rates_len = rates_ie[1]; if (rates_len > WILC_MAX_RATES_SUPPORTED) @@ -416,7 +427,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, if (rates_len < WILC_MAX_RATES_SUPPORTED) { supp_rates_ie = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, - ies->data, ies->len); + ies_data, ies_len); if (supp_rates_ie) { u8 ext_rates = supp_rates_ie[1]; @@ -431,11 +442,11 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } } - ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies->data, ies->len); + ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies_data, ies_len); if (ht_ie) param->ht_capable = true; - ret = cfg80211_get_p2p_attr(ies->data, ies->len, + ret = cfg80211_get_p2p_attr(ies_data, ies_len, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *)&noa_attr, sizeof(noa_attr)); if (ret > 0) { @@ -459,7 +470,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } wmm_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WMM, - ies->data, ies->len); + ies_data, ies_len); if (wmm_ie) { struct ieee80211_wmm_param_ie *ie; @@ -474,13 +485,13 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, wpa_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WPA, - ies->data, ies->len); + ies_data, ies_len); if (wpa_ie) { param->mode_802_11i = 1; param->rsn_found = true; } - rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); + rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies_data, ies_len); if (rsn_ie) { int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; @@ -514,6 +525,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, param->akm_suites[i] = crypto->akm_suites[i] & 0xFF; } + kfree(ies_data); return (void *)param; } -- GitLab From 3518cea837de4d106efa84ddac18a07b6de1384e Mon Sep 17 00:00:00 2001 From: Martin Kaistra Date: Thu, 11 Jan 2024 17:36:27 +0100 Subject: [PATCH 0765/1418] wifi: rtl8xxxu: add cancel_work_sync() for c2hcmd_work [ Upstream commit 1213acb478a7181cd73eeaf00db430f1e45b1361 ] The workqueue might still be running, when the driver is stopped. To avoid a use-after-free, call cancel_work_sync() in rtl8xxxu_stop(). Fixes: e542e66b7c2e ("rtl8xxxu: add bluetooth co-existence support for single antenna") Signed-off-by: Martin Kaistra Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/20240111163628.320697-2-martin.kaistra@linutronix.de Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6dd5ec1e4d8c3..ccac47dd781d6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6542,6 +6542,7 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw) if (priv->usb_interrupts) rtl8xxxu_write32(priv, REG_USB_HIMR, 0); + cancel_work_sync(&priv->c2hcmd_work); cancel_delayed_work_sync(&priv->ra_watchdog); rtl8xxxu_free_rx_resources(priv); -- GitLab From 515cc676dfbce40d93c92b1ff3c1070e917f4e52 Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Mon, 15 Jan 2024 15:56:32 +0100 Subject: [PATCH 0766/1418] wifi: wilc1000: do not realloc workqueue everytime an interface is added MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 328efda22af81130c2ad981c110518cb29ff2f1d ] Commit 09ed8bfc5215 ("wilc1000: Rename workqueue from "WILC_wq" to "NETDEV-wq"") moved workqueue creation in wilc_netdev_ifc_init in order to set the interface name in the workqueue name. However, while the driver needs only one workqueue, the wilc_netdev_ifc_init is called each time we add an interface over a phy, which in turns overwrite the workqueue with a new one. This can be observed with the following commands: for i in $(seq 0 10) do iw phy phy0 interface add wlan1 type managed iw dev wlan1 del done ps -eo pid,comm|grep wlan 39 kworker/R-wlan0 98 kworker/R-wlan1 102 kworker/R-wlan1 105 kworker/R-wlan1 108 kworker/R-wlan1 111 kworker/R-wlan1 114 kworker/R-wlan1 117 kworker/R-wlan1 120 kworker/R-wlan1 123 kworker/R-wlan1 126 kworker/R-wlan1 129 kworker/R-wlan1 Fix this leakage by putting back hif_workqueue allocation in wilc_cfg80211_init. Regarding the workqueue name, it is indeed relevant to set it lowercase, however it is not attached to a specific netdev, so enforcing netdev name in the name is not so relevant. Still, enrich the name with the wiphy name to make it clear which phy is using the workqueue. Fixes: 09ed8bfc5215 ("wilc1000: Rename workqueue from "WILC_wq" to "NETDEV-wq"") Signed-off-by: Ajay Singh Co-developed-by: Alexis Lothoré Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240115-wilc_1000_fixes-v1-3-54d29463a738@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 11 ++++++++++- drivers/net/wireless/microchip/wilc1000/netdev.c | 10 +--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b545d93c6e374..2f75dc4b47975 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1810,15 +1810,24 @@ int wilc_cfg80211_init(struct wilc **wilc, struct device *dev, int io_type, INIT_LIST_HEAD(&wl->rxq_head.list); INIT_LIST_HEAD(&wl->vif_list); + wl->hif_workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, + wiphy_name(wl->wiphy)); + if (!wl->hif_workqueue) { + ret = -ENOMEM; + goto free_cfg; + } vif = wilc_netdev_ifc_init(wl, "wlan%d", WILC_STATION_MODE, NL80211_IFTYPE_STATION, false); if (IS_ERR(vif)) { ret = PTR_ERR(vif); - goto free_cfg; + goto free_hq; } return 0; +free_hq: + destroy_workqueue(wl->hif_workqueue); + free_cfg: wilc_wlan_cfg_deinit(wl); diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index e9f59de31b0b9..0e6eeeed2e086 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -977,13 +977,6 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, goto error; } - wl->hif_workqueue = alloc_ordered_workqueue("%s-wq", WQ_MEM_RECLAIM, - ndev->name); - if (!wl->hif_workqueue) { - ret = -ENOMEM; - goto unregister_netdev; - } - ndev->needs_free_netdev = true; vif->iftype = vif_type; vif->idx = wilc_get_available_idx(wl); @@ -996,12 +989,11 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, return vif; -unregister_netdev: +error: if (rtnl_locked) cfg80211_unregister_netdevice(ndev); else unregister_netdev(ndev); - error: free_netdev(ndev); return ERR_PTR(ret); } -- GitLab From 5a26e6d2a7ec0c672d21b9c4a1c914c1b70d6d0f Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Mon, 15 Jan 2024 15:56:34 +0100 Subject: [PATCH 0767/1418] wifi: wilc1000: fix multi-vif management when deleting a vif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 12cfc9c8d3faf887a202c89bc312202445fca7e8 ] Adding then removing a second vif currently makes the first vif not working anymore. This is visible for example when we have a first interface connected to some access point: - create a wpa_supplicant.conf with some AP credentials - wpa_supplicant -Dnl80211 -c /etc/wpa_supplicant.conf -i wlan0 - dhclient wlan0 - iw phy phy0 interface add wlan1 type managed - iw dev wlan1 del wlan0 does not manage properly traffic anymore (eg: ping not working) This is due to vif mode being incorrectly reconfigured with some default values in del_virtual_intf, affecting by default first vif. Prevent first vif from being affected on second vif removal by removing vif mode change command in del_virtual_intf Fixes: 9bc061e88054 ("staging: wilc1000: added support to dynamically add/remove interfaces") Signed-off-by: Ajay Singh Co-developed-by: Alexis Lothoré Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240115-wilc_1000_fixes-v1-5-54d29463a738@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 2f75dc4b47975..6f3245a43aef1 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1615,7 +1615,6 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) cfg80211_unregister_netdevice(vif->ndev); vif->monitor_flag = 0; - wilc_set_operation_mode(vif, 0, 0, 0); mutex_lock(&wl->vif_mutex); list_del_rcu(&vif->list); wl->vif_num--; -- GitLab From 6798cf0aaa7c73cee7fd3d13fc7110617080a0a6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Sun, 3 Sep 2023 11:02:15 +0800 Subject: [PATCH 0768/1418] wifi: mwifiex: debugfs: Drop unnecessary error check for debugfs_create_dir() [ Upstream commit 50180c7f8e3de7c2d87f619131776598fcb1478d ] debugfs_create_dir() returns ERR_PTR and never return NULL. As Russell suggested, this patch removes the error checking for debugfs_create_dir(). This is because the DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. The debugfs APIs have a IS_ERR() judge in start_creating() which can handle it gracefully. So these checks are unnecessary. Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell mwifiex driver") Signed-off-by: Jinjie Ruan Suggested-by: Russell King (Oracle) Signed-off-by: Kalle Valo Link: https://msgid.link/20230903030216.1509013-3-ruanjinjie@huawei.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 63f232c723374..55ca5b287fe7f 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -964,9 +964,6 @@ mwifiex_dev_debugfs_init(struct mwifiex_private *priv) priv->dfs_dev_dir = debugfs_create_dir(priv->netdev->name, mwifiex_dfs_dir); - if (!priv->dfs_dev_dir) - return; - MWIFIEX_DFS_ADD_FILE(info); MWIFIEX_DFS_ADD_FILE(debug); MWIFIEX_DFS_ADD_FILE(getlog); -- GitLab From a81edfc5ec8b0b07e790265e4a9aee2135a2c0bf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 15 Jan 2024 12:03:03 +0100 Subject: [PATCH 0769/1418] ARM: dts: renesas: r8a73a4: Fix external clocks and clock rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 090c4094574705b0afc7d37825cdc5d06f0e7e02 ] External clocks should be defined as zero-Hz clocks in the SoC .dtsi, and overridden in the board .dts when present. Correct the clock rate of extal1 from 25 to 26 MHz, to match the crystal oscillator present on the APE6-EVM board. Fixes: a76809a329d6ebae ("ARM: shmobile: r8a73a4: Common clock framework DT description") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Link: https://lore.kernel.org/r/1692bc8cd465d62168cbf110522ad62a7af3f606.1705315614.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a73a4-ape6evm.dts | 12 ++++++++++++ arch/arm/boot/dts/r8a73a4.dtsi | 9 ++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts index e81a7213d3047..4282bafbb5043 100644 --- a/arch/arm/boot/dts/r8a73a4-ape6evm.dts +++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts @@ -209,6 +209,18 @@ status = "okay"; }; +&extal1_clk { + clock-frequency = <26000000>; +}; + +&extal2_clk { + clock-frequency = <48000000>; +}; + +&extalr_clk { + clock-frequency = <32768>; +}; + &pfc { scifa0_pins: scifa0 { groups = "scifa0_data"; diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index c39066967053f..d1f4cbd099efb 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -450,17 +450,20 @@ extalr_clk: extalr { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32768>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal1_clk: extal1 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <25000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal2_clk: extal2 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <48000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; fsiack_clk: fsiack { compatible = "fixed-clock"; -- GitLab From e72160cb6e23b78b41999d6885a34ce8db536095 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Wed, 17 Jan 2024 10:12:20 +0300 Subject: [PATCH 0770/1418] cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value [ Upstream commit f661017e6d326ee187db24194cabb013d81bc2a6 ] cpufreq_cpu_get may return NULL. To avoid NULL-dereference check it and return 0 in case of error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Anastasia Belova Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index f644c5e325fb2..38ec0fedb247f 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,6 +481,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; struct private_data *priv = policy->driver_data; cpufreq_cpu_put(policy); -- GitLab From b98ed6417e5958b8bae37798a0e0663e5b4317d2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:44:13 -0600 Subject: [PATCH 0771/1418] cpufreq: Explicitly include correct DT includes [ Upstream commit a70eb93a2477371638ef481aaae7bb7b760d3004 ] The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Stable-dep-of: 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") Signed-off-by: Sasha Levin --- drivers/cpufreq/armada-37xx-cpufreq.c | 4 +--- drivers/cpufreq/mediatek-cpufreq-hw.c | 3 ++- drivers/cpufreq/ppc_cbe_cpufreq.c | 2 +- drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 1 - drivers/cpufreq/qcom-cpufreq-nvmem.c | 1 - drivers/cpufreq/scpi-cpufreq.c | 2 +- drivers/cpufreq/sti-cpufreq.c | 2 +- drivers/cpufreq/ti-cpufreq.c | 2 +- drivers/cpufreq/vexpress-spc-cpufreq.c | 1 - 9 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index b74289a95a171..bea41ccabf1f0 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -14,10 +14,8 @@ #include #include #include +#include #include -#include -#include -#include #include #include #include diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f217..212bbca8daf32 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -10,8 +10,9 @@ #include #include #include -#include +#include #include +#include #include #define LUT_MAX_ENTRIES 32U diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index e3313ce63b388..88afc49941b71 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 4fba3637b115c..6f0c32592416d 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index a577586b23be2..cb03bfb0435ea 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index fd2c16821d54c..ac719aca49b75 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 1a63aeea87112..9c542e723a157 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005b..61ef653bcf56f 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index d295f405c4bb0..865e501648034 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include -- GitLab From 60b5b89e41a68bccad14ea7cecc8fe494ce48830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 10 Jan 2024 11:23:02 -0300 Subject: [PATCH 0772/1418] cpufreq: mediatek-hw: Wait for CPU supplies before probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 788715b5f21c6455264fe00a1779e61bec407fe2 ] Before proceeding with the probe and enabling frequency scaling for the CPUs, make sure that all supplies feeding the CPUs have probed. This fixes an issue observed on MT8195-Tomato where if the mediatek-cpufreq-hw driver enabled the hardware (by writing to REG_FREQ_ENABLE) before the SPMI controller driver (spmi-mtk-pmif), behind which lies the big CPU supply, probed the platform would hang shortly after with "rcu: INFO: rcu_preempt detected stalls on CPUs/tasks" being printed in the log. Fixes: 4855e26bcf4d ("cpufreq: mediatek-hw: Add support for CPUFREQ HW") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/mediatek-cpufreq-hw.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 212bbca8daf32..42240a7d826da 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #define LUT_MAX_ENTRIES 32U @@ -296,7 +297,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { const void *data; - int ret; + int ret, cpu; + struct device *cpu_dev; + struct regulator *cpu_reg; + + /* Make sure that all CPU supplies are available before proceeding. */ + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "Failed to get cpu%d device\n", cpu); + + cpu_reg = devm_regulator_get_optional(cpu_dev, "cpu"); + if (IS_ERR(cpu_reg)) + return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), + "CPU%d regulator get failed\n", cpu); + } + data = of_device_get_match_data(&pdev->dev); if (!data) -- GitLab From f7dbf2c1675ae53c8f9b1ff77741daa08060873b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:55 +0000 Subject: [PATCH 0773/1418] sock_diag: annotate data-races around sock_diag_handlers[family] [ Upstream commit efd402537673f9951992aea4ef0f5ff51d858f4b ] __sock_diag_cmd() and sock_diag_bind() read sock_diag_handlers[family] without a lock held. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: 8ef874bfc729 ("sock_diag: Move the sock_ code to net/core/") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/sock_diag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index f7cf74cdd3db1..e6ea6764d10ab 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -190,7 +190,7 @@ int sock_diag_register(const struct sock_diag_handler *hndl) if (sock_diag_handlers[hndl->family]) err = -EBUSY; else - sock_diag_handlers[hndl->family] = hndl; + WRITE_ONCE(sock_diag_handlers[hndl->family], hndl); mutex_unlock(&sock_diag_table_mutex); return err; @@ -206,7 +206,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) mutex_lock(&sock_diag_table_mutex); BUG_ON(sock_diag_handlers[family] != hnld); - sock_diag_handlers[family] = NULL; + WRITE_ONCE(sock_diag_handlers[family], NULL); mutex_unlock(&sock_diag_table_mutex); } EXPORT_SYMBOL_GPL(sock_diag_unregister); @@ -224,7 +224,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX); - if (sock_diag_handlers[req->sdiag_family] == NULL) + if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL) sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); @@ -283,12 +283,12 @@ static int sock_diag_bind(struct net *net, int group) switch (group) { case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET]) + if (!READ_ONCE(sock_diag_handlers[AF_INET])) sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET6]) + if (!READ_ONCE(sock_diag_handlers[AF_INET6])) sock_load_diag_module(AF_INET6, 0); break; } -- GitLab From d10dbf722d32bbf3e01ec754604b396142ad38a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:56 +0000 Subject: [PATCH 0774/1418] inet_diag: annotate data-races around inet_diag_table[] [ Upstream commit e50e10ae5d81ddb41547114bfdc5edc04422f082 ] inet_diag_lock_handler() reads inet_diag_table[proto] locklessly. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: d523a328fb02 ("[INET]: Fix inet_diag dead-lock regression") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f7426926a1041..8f690a6e61baa 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return ERR_PTR(-ENOENT); } - if (!inet_diag_table[proto]) + if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); @@ -1419,7 +1419,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; if (!inet_diag_table[type]) { - inet_diag_table[type] = h; + WRITE_ONCE(inet_diag_table[type], h); err = 0; } mutex_unlock(&inet_diag_table_mutex); @@ -1436,7 +1436,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) return; mutex_lock(&inet_diag_table_mutex); - inet_diag_table[type] = NULL; + WRITE_ONCE(inet_diag_table[type], NULL); mutex_unlock(&inet_diag_table_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); -- GitLab From c1760abb10021dc140e064d74e3f49a48b9e5e40 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 16 Jan 2024 14:19:20 +0800 Subject: [PATCH 0775/1418] bpftool: Silence build warning about calloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f5f30386c78105cba520e443a6a9ee945ec1d066 ] There exists the following warning when building bpftool: CC prog.o prog.c: In function ‘profile_open_perf_events’: prog.c:2301:24: warning: ‘calloc’ sizes specified with ‘sizeof’ in the earlier argument and not in the later argument [-Wcalloc-transposed-args] 2301 | sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); | ^~~ prog.c:2301:24: note: earlier argument should specify number of elements, later size of each element Tested with the latest upstream GCC which contains a new warning option -Wcalloc-transposed-args. The first argument to calloc is documented to be number of elements in array, while the second argument is size of each element, just switch the first and second arguments of calloc() to silence the build warning, compile tested only. Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240116061920.31172-1-yangtiezhu@loongson.cn Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 41c02b6f6f043..7e0b846e17eef 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2200,7 +2200,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj) int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); -- GitLab From bb21851465b3f1b9323217c4517f7aa37aaa4fd1 Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:18 +0300 Subject: [PATCH 0776/1418] libbpf: Apply map_set_def_max_entries() for inner_maps on creation [ Upstream commit f04deb90e516e8e48bf8693397529bc942a9e80b ] This patch allows to auto create BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS with values of BPF_MAP_TYPE_PERF_EVENT_ARRAY by bpf_object__load(). Previous behaviour created a zero filled btf_map_def for inner maps and tried to use it for a map creation but the linux kernel forbids to create a BPF_MAP_TYPE_PERF_EVENT_ARRAY map with max_entries=0. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-1-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2014b1250ea2..c71d4d0f5c6f3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -70,6 +70,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -4992,6 +4993,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", -- GitLab From 6ae1ec0043d5ab85995770c52fb81b21f2be4712 Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:19 +0300 Subject: [PATCH 0777/1418] selftest/bpf: Add map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values [ Upstream commit 40628f9fff73adecac77a9aa390f8016724cad99 ] Check that bpf_object__load() successfully creates map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values. These changes cover fix in the previous patch "libbpf: Apply map_set_def_max_entries() for inner_maps on creation". A command line output is: - w/o fix $ sudo ./test_maps libbpf: map 'mim_array_pe': failed to create inner map: -22 libbpf: map 'mim_array_pe': failed to create: Invalid argument(-22) libbpf: failed to load object './test_map_in_map.bpf.o' Failed to load test prog - with fix $ sudo ./test_maps ... test_maps: OK, 0 SKIPPED Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-2-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- .../selftests/bpf/progs/test_map_in_map.c | 26 +++++++++++++++++++ tools/testing/selftests/bpf/test_maps.c | 6 ++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba858b..b295f9b721bf8 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -21,6 +21,32 @@ struct { __type(value, __u32); } mim_hash SEC(".maps"); +/* The following three maps are used to test + * perf_event_array map can be an inner + * map of hash/array_of_maps. + */ +struct perf_event_array { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} inner_map0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_array_pe SEC(".maps") = { + .values = {&inner_map0}}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_hash_pe SEC(".maps") = { + .values = {&inner_map0}}; + SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index b73152822aa28..81cd48cc80c23 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1190,7 +1190,11 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__load(obj); + err = bpf_object__load(obj); + if (err) { + printf("Failed to load test prog\n"); + goto out_map_in_map; + } map = bpf_object__find_map_by_name(obj, "mim_array"); if (!map) { -- GitLab From 7ab56f24bf01ae3ffaac88fe1f41744f6a25a0b8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:52 -0800 Subject: [PATCH 0778/1418] af_unix: Annotate data-race of gc_in_progress in wait_for_unix_gc(). [ Upstream commit 31e03207119a535d0b0e3b3a7f91983aeb2cb14d ] gc_in_progress is changed under spin_lock(&unix_gc_lock), but wait_for_unix_gc() reads it locklessly. Let's use READ_ONCE(). Fixes: 5f23b734963e ("net: Fix soft lockups/OOM issues w/ unix garbage collector") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/garbage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ab2c83d58b62a..9bfffe2a7f020 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -198,7 +198,7 @@ void wait_for_unix_gc(void) if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && !READ_ONCE(gc_in_progress)) unix_gc(); - wait_event(unix_gc_wait, gc_in_progress == false); + wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); } /* The external entry point: unix_gc() */ -- GitLab From 1b3bae282ae892df62e6b2ff7fceed43f4b16ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 24 Jan 2024 17:31:43 -0300 Subject: [PATCH 0779/1418] cpufreq: mediatek-hw: Don't error out if supply is not found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eaffb10b51bf74415c9252fd8fb4dd77122501ee ] devm_regulator_get_optional() returns -ENODEV if no supply can be found. By introducing its usage, commit 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") caused the driver to fail probe if no supply was present in any of the CPU DT nodes. Use devm_regulator_get() instead since the CPUs do require supplies even if not described in the DT. It will gracefully return a dummy regulator if none is found in the DT node, allowing probe to succeed. Fixes: 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") Reported-by: kernelci.org bot Closes: https://linux.kernelci.org/test/case/id/65b0b169710edea22852a3fa/ Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 42240a7d826da..7f326bb5fd8de 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -308,7 +308,7 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "Failed to get cpu%d device\n", cpu); - cpu_reg = devm_regulator_get_optional(cpu_dev, "cpu"); + cpu_reg = devm_regulator_get(cpu_dev, "cpu"); if (IS_ERR(cpu_reg)) return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), "CPU%d regulator get failed\n", cpu); -- GitLab From 3248f4ae054c787a72905ed42ef736bad0d5140e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 26 Jan 2024 14:09:44 -0800 Subject: [PATCH 0780/1418] libbpf: Fix faccessat() usage on Android [ Upstream commit ad57654053805bf9a62602aaec74cc78edb6f235 ] Android implementation of libc errors out with -EINVAL in faccessat() if passed AT_EACCESS ([0]), this leads to ridiculous issue with libbpf refusing to load /sys/kernel/btf/vmlinux on Androids ([1]). Fix by detecting Android and redefining AT_EACCESS to 0, it's equivalent on Android. [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 Fixes: 6a4ab8869d0b ("libbpf: Fix the case of running as non-root with capabilities") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240126220944.2497665-1-andrii@kernel.org Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf_internal.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 377642ff51fce..8669f6e0f6e2f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -17,6 +17,20 @@ #include #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -- GitLab From 4c820998a5a481e8cc99da48068c94d361391612 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 25 Jan 2024 13:05:10 -0800 Subject: [PATCH 0781/1418] pmdomain: qcom: rpmhpd: Drop SA8540P gfx.lvl [ Upstream commit 883957bee580b723fd87d49ac73e0c84fc03a446 ] On SA8295P and SA8540P gfx.lvl is not provdied by rpmh, but rather is handled by an external regulator (max20411). Drop gfx.lvl from the list of power-domains exposed on this platform. Fixes: f68f1cb3437d ("soc: qcom: rpmhpd: add sc8280xp & sa8540p rpmh power-domains") Reviewed-by: Dmitry Baryshkov Acked-by: Ulf Hansson Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240125-sa8295p-gpu-v4-4-7011c2a63037@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/soc/qcom/rpmhpd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 9a90f241bb97f..6efe36aeb48e9 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -195,7 +195,6 @@ static struct rpmhpd *sa8540p_rpmhpds[] = { [SC8280XP_CX] = &cx, [SC8280XP_CX_AO] = &cx_ao, [SC8280XP_EBI] = &ebi, - [SC8280XP_GFX] = &gfx, [SC8280XP_LCX] = &lcx, [SC8280XP_LMX] = &lmx, [SC8280XP_MMCX] = &mmcx, -- GitLab From 8f38b401b4e05e768484b9bde356f46d4bb00247 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:48:58 +0100 Subject: [PATCH 0782/1418] arm64: dts: imx8mm-kontron: Disable pullups for I2C signals on OSM-S i.MX8MM [ Upstream commit 96293af54f6aa859015d8ca40a1437d3115ad50c ] There are external pullup resistors on the board and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts | 4 ++-- arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 8b16bd68576c0..0730c22e5b6b9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -294,8 +294,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi index 8d10f5b412978..9643d6ed9a7c7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi @@ -247,8 +247,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; -- GitLab From 333a02b3be6a3bc42a349c3bea0b6c835226286f Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:48:59 +0100 Subject: [PATCH 0783/1418] arm64: dts: imx8mm-kontron: Disable pullups for I2C signals on SL/BL i.MX8MM [ Upstream commit f19e5bb91d53264d7dac5d845a4825afadf72440 ] There are external pullup resistors on the board and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts | 4 ++-- arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index a079322a37931..8d0527bb6fa59 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -277,8 +277,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi index 0679728d24899..884ae2ad35114 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi @@ -237,8 +237,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; -- GitLab From dde02bf5fc9bb9ac7b112527d49f98aa34f97918 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:00 +0100 Subject: [PATCH 0784/1418] arm64: dts: imx8mm-kontron: Disable pullups for onboard UART signals on BL OSM-S board [ Upstream commit c6d9b5672a0e2c4b1079a50d2fc8780c40cfd3eb ] These signals are actively driven by the SoC or by the onboard transceiver. There's no need to enable the internal pull resistors and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-kontron-bl-osm-s.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 0730c22e5b6b9..1dd03ef0a7835 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -313,19 +313,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; -- GitLab From ed2186ea406fdb2263635ce37b210025aa9d6d77 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:01 +0100 Subject: [PATCH 0785/1418] arm64: dts: imx8mm-kontron: Disable pullups for onboard UART signals on BL board [ Upstream commit 162aadaa0df8217b0cc49d919dd00022fef65e78 ] These signals are actively driven by the SoC or by the onboard transceiver. There's no need to enable the internal pull resistors and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8mm-kontron-bl.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index 8d0527bb6fa59..bffa0ea4aa46a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -290,19 +290,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; -- GitLab From a4116bd6ee5e1c1b65a61ed9221657615a2f45bf Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:02 +0100 Subject: [PATCH 0786/1418] arm64: dts: imx8mm-kontron: Disable pull resistors for SD card signals on BL OSM-S board [ Upstream commit 5a940ba3e4d7c8710c9073ff5d0ca4644d4da9db ] Some signals have external pullup resistors on the board and don't need the internal ones to be enabled. Due to silicon errata ERR050080 let's disable the internal pull resistors whererever possible and prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-kontron-bl-osm-s.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 1dd03ef0a7835..d9fa0deea7002 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -337,40 +337,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; -- GitLab From 795fb93bde7aa1f769e377ec28853eab0dec8181 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:03 +0100 Subject: [PATCH 0787/1418] arm64: dts: imx8mm-kontron: Disable pull resistors for SD card signals on BL board [ Upstream commit 008820524844326ffb3123cebceba1960c0ad0dc ] Some signals have external pullup resistors on the board and don't need the internal ones to be enabled. Due to silicon errata ERR050080 let's disable the internal pull resistors whererever possible and prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8mm-kontron-bl.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index bffa0ea4aa46a..d54cddd65b526 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -314,40 +314,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; -- GitLab From 4f4f1e2deef2e6aaf98e10847a08b7fc25796834 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:04 +0100 Subject: [PATCH 0788/1418] arm64: dts: imx8mm-kontron: Fix interrupt for RTC on OSM-S i.MX8MM module [ Upstream commit 8d0f39b7d04d864e89b84063b124fd10aa4b8809 ] The level of the interrupt signal is active low instead. Fix this. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi index 9643d6ed9a7c7..d5199ecb3f6c1 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi @@ -205,7 +205,7 @@ reg = <0x52>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc>; - interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_LOW>; trickle-diode-disable; }; }; -- GitLab From fed6a1df672eb0c281b17ba72ef78edcb724ce3a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:24 -0800 Subject: [PATCH 0789/1418] libbpf: Add missing LIBBPF_API annotation to libbpf_set_memlock_rlim API [ Upstream commit 93ee1eb85e28d1e35bb059c1f5965d65d5fc83c2 ] LIBBPF_API annotation seems missing on libbpf_set_memlock_rlim API, so add it to make this API callable from libbpf's shared library version. Fixes: e542f2c4cd16 ("libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF") Fixes: ab9a5a05dc48 ("libbpf: fix up few libbpf.map problems") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-3-andrii@kernel.org Signed-off-by: Sasha Levin --- tools/lib/bpf/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index fddc05c667b5d..874fe362375de 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ -- GitLab From 74d0639261dd795dce958d1b14815bdcbb48a715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jan 2024 15:02:17 +0100 Subject: [PATCH 0790/1418] wifi: ath9k: delay all of ath9k_wmi_event_tasklet() until init is complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24355fcb0d4cbcb6ddda262596558e8cfba70f11 ] The ath9k_wmi_event_tasklet() used in ath9k_htc assumes that all the data structures have been fully initialised by the time it runs. However, because of the order in which things are initialised, this is not guaranteed to be the case, because the device is exposed to the USB subsystem before the ath9k driver initialisation is completed. We already committed a partial fix for this in commit: 8b3046abc99e ("ath9k_htc: fix NULL pointer dereference at ath9k_htc_tx_get_packet()") However, that commit only aborted the WMI_TXSTATUS_EVENTID command in the event tasklet, pairing it with an "initialisation complete" bit in the TX struct. It seems syzbot managed to trigger the race for one of the other commands as well, so let's just move the existing synchronisation bit to cover the whole tasklet (setting it at the end of ath9k_htc_probe_device() instead of inside ath9k_tx_init()). Link: https://lore.kernel.org/r/ed1d2c66-1193-4c81-9542-d514c29ba8b8.bugreport@ubisectech.com Fixes: 8b3046abc99e ("ath9k_htc: fix NULL pointer dereference at ath9k_htc_tx_get_packet()") Reported-by: Ubisectech Sirius Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://msgid.link/20240126140218.1033443-1-toke@toke.dk Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc.h | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 4 ++++ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 4 ---- drivers/net/wireless/ath/ath9k/wmi.c | 10 ++++++---- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 237f4ec2cffd7..6c33e898b3000 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -306,7 +306,6 @@ struct ath9k_htc_tx { DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM); struct timer_list cleanup_timer; spinlock_t tx_lock; - bool initialized; }; struct ath9k_htc_tx_ctl { @@ -515,6 +514,7 @@ struct ath9k_htc_priv { unsigned long ps_usecount; bool ps_enabled; bool ps_idle; + bool initialized; #ifdef CONFIG_MAC80211_LEDS enum led_brightness brightness; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 96a3185a96d75..b014185373f34 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -966,6 +966,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, htc_handle->drv_priv = priv; + /* Allow ath9k_wmi_event_tasklet() to operate. */ + smp_wmb(); + priv->initialized = true; + return 0; err_init: diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index d6a3f001dacb9..2fdd27885f543 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -815,10 +815,6 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv) skb_queue_head_init(&priv->tx.data_vo_queue); skb_queue_head_init(&priv->tx.tx_failed); - /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */ - smp_wmb(); - priv->tx.initialized = true; - return 0; } diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 1476b42b52a91..805ad31edba2b 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -155,6 +155,12 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) } spin_unlock_irqrestore(&wmi->wmi_lock, flags); + /* Check if ath9k_htc_probe_device() completed. */ + if (!data_race(priv->initialized)) { + kfree_skb(skb); + continue; + } + hdr = (struct wmi_cmd_hdr *) skb->data; cmd_id = be16_to_cpu(hdr->command_id); wmi_event = skb_pull(skb, sizeof(struct wmi_cmd_hdr)); @@ -169,10 +175,6 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) &wmi->drv_priv->fatal_work); break; case WMI_TXSTATUS_EVENTID: - /* Check if ath9k_tx_init() completed. */ - if (!data_race(priv->tx.initialized)) - break; - spin_lock_bh(&priv->tx.tx_lock); if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) { spin_unlock_bh(&priv->tx.tx_lock); -- GitLab From 9cd961d993b3a52215b74b6c75059eff50078685 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Jan 2024 08:53:48 +0200 Subject: [PATCH 0791/1418] wifi: iwlwifi: mvm: report beacon protection failures [ Upstream commit 91380f768d7f6e3d003755defa792e9a00a1444a ] Andrei reports that we just silently drop beacons after we report the key counters, but never report to userspace, so wpa_supplicant cannot send the WNM action frame. Fix that. Fixes: b1fdc2505abc ("iwlwifi: mvm: advertise BIGTK client support if available") Reported-by: Andrei Otcheretianski Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240128084842.7d855442cdce.Iba90b26f893dc8c49bfb8be65373cd0a138af12c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index f268a31ce26d9..105f283b777d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -299,6 +299,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, u32 status, struct ieee80211_rx_status *stats) { + struct wireless_dev *wdev; struct iwl_mvm_sta *mvmsta; struct iwl_mvm_vif *mvmvif; u8 keyid; @@ -320,9 +321,15 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!ieee80211_is_beacon(hdr->frame_control)) return 0; + if (!sta) + return -1; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); + /* key mismatch - will also report !MIC_OK but we shouldn't count it */ if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID)) - return -1; + goto report; /* good cases */ if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK && @@ -331,13 +338,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return 0; } - if (!sta) - return -1; - - mvmsta = iwl_mvm_sta_from_mac80211(sta); - - mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); - /* * both keys will have the same cipher and MIC length, use * whichever one is available @@ -346,11 +346,11 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!key) { key = rcu_dereference(mvmvif->bcn_prot.keys[1]); if (!key) - return -1; + goto report; } if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2) - return -1; + goto report; /* get the real key ID */ keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2]; @@ -364,7 +364,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return -1; key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]); if (!key) - return -1; + goto report; } /* Report status to mac80211 */ @@ -372,6 +372,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, ieee80211_key_mic_failure(key); else if (status & IWL_RX_MPDU_STATUS_REPLAY_ERROR) ieee80211_key_replay(key); +report: + wdev = ieee80211_vif_to_wdev(mvmsta->vif); + if (wdev->netdev) + cfg80211_rx_unprot_mlme_mgmt(wdev->netdev, (void *)hdr, len); return -1; } -- GitLab From c855a1a5b7e3de57e6b1b29563113d5e3bfdb89a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Jan 2024 08:53:53 +0200 Subject: [PATCH 0792/1418] wifi: iwlwifi: dbg-tlv: ensure NUL termination [ Upstream commit ea1d166fae14e05d49ffb0ea9fcd4658f8d3dcea ] The iwl_fw_ini_debug_info_tlv is used as a string, so we must ensure the string is terminated correctly before using it. Fixes: a9248de42464 ("iwlwifi: dbg_ini: add TLV allocation new API support") Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240128084842.be15e858ee89.Ibff93429cf999eafc7b26f3eef4c055dc84984a0@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 5979d904bbbd2..677c9e0b46f10 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -103,6 +103,12 @@ static int iwl_dbg_tlv_alloc_debug_info(struct iwl_trans *trans, if (le32_to_cpu(tlv->length) != sizeof(*debug_info)) return -EINVAL; + /* we use this as a string, ensure input was NUL terminated */ + if (strnlen(debug_info->debug_cfg_name, + sizeof(debug_info->debug_cfg_name)) == + sizeof(debug_info->debug_cfg_name)) + return -EINVAL; + IWL_DEBUG_FW(trans, "WRT: Loading debug cfg: %s\n", debug_info->debug_cfg_name); -- GitLab From 5666fe7b8518460572f65769cc3c7ef6262e4ca2 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 29 Jan 2024 21:21:49 +0200 Subject: [PATCH 0793/1418] wifi: iwlwifi: fix EWRD table validity check [ Upstream commit c8d8f3911135921ace8e939ea0956b55f74bf8a0 ] EWRD ACPI table contains up to 3 additional sar profiles. According to the BIOS spec, the table contains a n_profile variable indicating how many additional profiles exist in the table. Currently we check that n_profiles is not <= 0. But according to the BIOS spec, 0 is a valid value, and it can't be < 0 anyway because we receive that from ACPI as an unsigned integer. Fixes: 39c1a9728f93 ("iwlwifi: refactor the SAR tables from mvm to acpi") Signed-off-by: Miri Korenblit Reviewed-by: Gregory Greenman Link: https://msgid.link/20240129211905.448ea2f40814.Iffd2aadf8e8693e6cb599bee0406a800a0c1e081@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index f5fcc547de391..235963e1d7a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -725,7 +725,7 @@ read_table: * from index 1, so the maximum value allowed here is * ACPI_SAR_PROFILES_NUM - 1. */ - if (n_profiles <= 0 || n_profiles >= ACPI_SAR_PROFILE_NUM) { + if (n_profiles >= ACPI_SAR_PROFILE_NUM) { ret = -EINVAL; goto out_free; } -- GitLab From d80bac49aebf5b7011188290e94ba16343c6078b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 24 Jan 2024 21:58:57 +0100 Subject: [PATCH 0794/1418] gpio: vf610: allow disabling the vf610 driver [ Upstream commit f57595788244a838deec2d3be375291327cbc035 ] The vf610 gpio driver is enabled by default for all i.MX machines, without any option to disable it in a board-specific config file. Most i.MX chipsets have no hardware for this driver. Change the default to enable GPIO_VF610 for SOC_VF610 and disable it otherwise. Add a text description after the bool type, this makes the driver selectable by make config etc. Fixes: 30a35c07d9e9 ("gpio: vf610: drop the SOC_VF610 dependency for GPIO_VF610") Signed-off-by: Martin Kaiser Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3e8e5f4ffa59f..700f71c954956 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -679,7 +679,8 @@ config GPIO_UNIPHIER Say yes here to support UniPhier GPIOs. config GPIO_VF610 - def_bool y + bool "VF610 GPIO support" + default y if SOC_VF610 depends on ARCH_MXC select GPIOLIB_IRQCHIP help -- GitLab From a5fd802a1f5554037eec49a48a2eebc810d4a39b Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 20 Dec 2023 15:30:46 -0800 Subject: [PATCH 0795/1418] arm64: dts: imx8mm-venice-gw71xx: fix USB OTG VBUS [ Upstream commit ec2cb52fcfef5d58574f2cfbc9a99ffc20ae5a9d ] The GW71xx does not have a gpio controlled vbus regulator but it does require some pinctrl. Remove the regulator and move the valid pinctrl into the usbotg1 node. Fixes: bd306fdb4e60 ("arm64: dts: imx8mm-venice-gw71xx: fix USB OTG VBUS") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-venice-gw71xx.dtsi | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index c557dbf4dcd60..2e90466db89a0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -47,17 +47,6 @@ gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; status = "okay"; }; - - reg_usb_otg1_vbus: regulator-usb-otg1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_reg_usb1_en>; - compatible = "regulator-fixed"; - regulator-name = "usb_otg1_vbus"; - gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; - enable-active-high; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - }; }; /* off-board header */ @@ -146,9 +135,10 @@ }; &usbotg1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; over-current-active-low; - vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; @@ -206,14 +196,6 @@ >; }; - pinctrl_reg_usb1_en: regusb1grp { - fsl,pins = < - MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x41 - MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 - MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 - >; - }; - pinctrl_spi2: spi2grp { fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 @@ -236,4 +218,11 @@ MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140 >; }; + + pinctrl_usbotg1: usbotg1grp { + fsl,pins = < + MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 + MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 + >; + }; }; -- GitLab From 992cbc89b0ffe20bc906b6ebb0dfda6353476b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Mar 2023 19:54:16 +0100 Subject: [PATCH 0796/1418] pwm: atmel-hlcdc: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5fce94170ad8a67b839f3dd8e8e8a87039ba0251 ] The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Claudiu Beznea Signed-off-by: Thierry Reding Stable-dep-of: e25ac87d3f83 ("pwm: atmel-hlcdc: Fix clock imbalance related to suspend support") Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index a43b2babc8093..96a709a9d49a8 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -278,15 +278,13 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) return 0; } -static int atmel_hlcdc_pwm_remove(struct platform_device *pdev) +static void atmel_hlcdc_pwm_remove(struct platform_device *pdev) { struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); pwmchip_remove(&chip->chip); clk_disable_unprepare(chip->hlcdc->periph_clk); - - return 0; } static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { @@ -301,7 +299,7 @@ static struct platform_driver atmel_hlcdc_pwm_driver = { .pm = &atmel_hlcdc_pwm_pm_ops, }, .probe = atmel_hlcdc_pwm_probe, - .remove = atmel_hlcdc_pwm_remove, + .remove_new = atmel_hlcdc_pwm_remove, }; module_platform_driver(atmel_hlcdc_pwm_driver); -- GitLab From 78b8952e1df233ae68e1dcc838272e65e3705232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Jul 2023 22:56:15 +0200 Subject: [PATCH 0797/1418] pwm: atmel-hlcdc: Use consistent variable naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aecab554b6ffa9a94ba796031eb39ea20eb60fb3 ] In PWM drivers the variable name "chip" is usually only used for struct pwm_chip pointers. This driver however used "chip" for its driver data and pwm_chip pointers are named "chip", too, when there is no driver data around and "c" otherwise. Instead use "atmel" for driver data and always "chip" for pwm_chips. Signed-off-by: Uwe Kleine-König Reviewed-by: Claudiu Beznea [thierry.reding@gmail.com: replace ddata by atmel] Signed-off-by: Thierry Reding Stable-dep-of: e25ac87d3f83 ("pwm: atmel-hlcdc: Fix clock imbalance related to suspend support") Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 65 ++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 96a709a9d49a8..4d0b859d0ac13 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -38,11 +38,11 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) return container_of(chip, struct atmel_hlcdc_pwm, chip); } -static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, +static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); - struct atmel_hlcdc *hlcdc = chip->hlcdc; + struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip); + struct atmel_hlcdc *hlcdc = atmel->hlcdc; unsigned int status; int ret; @@ -54,7 +54,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, u32 pwmcfg; int pres; - if (!chip->errata || !chip->errata->slow_clk_erratum) { + if (!atmel->errata || !atmel->errata->slow_clk_erratum) { clk_freq = clk_get_rate(new_clk); if (!clk_freq) return -EINVAL; @@ -64,7 +64,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, } /* Errata: cannot use slow clk on some IP revisions */ - if ((chip->errata && chip->errata->slow_clk_erratum) || + if ((atmel->errata && atmel->errata->slow_clk_erratum) || clk_period_ns > state->period) { new_clk = hlcdc->sys_clk; clk_freq = clk_get_rate(new_clk); @@ -77,8 +77,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) { /* Errata: cannot divide by 1 on some IP revisions */ - if (!pres && chip->errata && - chip->errata->div1_clk_erratum) + if (!pres && atmel->errata && + atmel->errata->div1_clk_erratum) continue; if ((clk_period_ns << pres) >= state->period) @@ -90,7 +90,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, pwmcfg = ATMEL_HLCDC_PWMPS(pres); - if (new_clk != chip->cur_clk) { + if (new_clk != atmel->cur_clk) { u32 gencfg = 0; int ret; @@ -98,8 +98,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = new_clk; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = new_clk; if (new_clk == hlcdc->sys_clk) gencfg = ATMEL_HLCDC_CLKPWMSEL; @@ -160,8 +160,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = NULL; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = NULL; } return 0; @@ -183,31 +183,32 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = { #ifdef CONFIG_PM_SLEEP static int atmel_hlcdc_pwm_suspend(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); /* Keep the periph clock enabled if the PWM is still running. */ - if (pwm_is_enabled(&chip->chip.pwms[0])) - clk_disable_unprepare(chip->hlcdc->periph_clk); + if (pwm_is_enabled(&atmel->chip.pwms[0])) + clk_disable_unprepare(atmel->hlcdc->periph_clk); return 0; } static int atmel_hlcdc_pwm_resume(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); struct pwm_state state; int ret; - pwm_get_state(&chip->chip.pwms[0], &state); + pwm_get_state(&atmel->chip.pwms[0], &state); /* Re-enable the periph clock it was stopped during suspend. */ if (!state.enabled) { - ret = clk_prepare_enable(chip->hlcdc->periph_clk); + ret = clk_prepare_enable(atmel->hlcdc->periph_clk); if (ret) return ret; } - return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state); + return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0], + &state); } #endif @@ -244,14 +245,14 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) { const struct of_device_id *match; struct device *dev = &pdev->dev; - struct atmel_hlcdc_pwm *chip; + struct atmel_hlcdc_pwm *atmel; struct atmel_hlcdc *hlcdc; int ret; hlcdc = dev_get_drvdata(dev->parent); - chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); - if (!chip) + atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL); + if (!atmel) return -ENOMEM; ret = clk_prepare_enable(hlcdc->periph_clk); @@ -260,31 +261,31 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node); if (match) - chip->errata = match->data; + atmel->errata = match->data; - chip->hlcdc = hlcdc; - chip->chip.ops = &atmel_hlcdc_pwm_ops; - chip->chip.dev = dev; - chip->chip.npwm = 1; + atmel->hlcdc = hlcdc; + atmel->chip.ops = &atmel_hlcdc_pwm_ops; + atmel->chip.dev = dev; + atmel->chip.npwm = 1; - ret = pwmchip_add(&chip->chip); + ret = pwmchip_add(&atmel->chip); if (ret) { clk_disable_unprepare(hlcdc->periph_clk); return ret; } - platform_set_drvdata(pdev, chip); + platform_set_drvdata(pdev, atmel); return 0; } static void atmel_hlcdc_pwm_remove(struct platform_device *pdev) { - struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); + struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev); - pwmchip_remove(&chip->chip); + pwmchip_remove(&atmel->chip); - clk_disable_unprepare(chip->hlcdc->periph_clk); + clk_disable_unprepare(atmel->hlcdc->periph_clk); } static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { -- GitLab From ecab386a8edb4f6b06dd089882cc3a2b0d8c0c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 26 Jan 2024 13:04:33 +0100 Subject: [PATCH 0798/1418] pwm: atmel-hlcdc: Fix clock imbalance related to suspend support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e25ac87d3f831fed002c34aadddaf4ebb4ea45ec ] The suspend callback disables the periph clock when the PWM is enabled and resume reenables this clock if the PWM was disabled before. Judging from the code comment it's suspend that is wrong here. Fix accordingly. Fixes: f9bb9da7c09d ("pwm: atmel-hlcdc: Implement the suspend/resume hooks") Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/b51ea92b0a45eff3dc83b08adefd43d930df996c.1706269232.git.u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 4d0b859d0ac13..3e9c94a8d7f72 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -186,7 +186,7 @@ static int atmel_hlcdc_pwm_suspend(struct device *dev) struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); /* Keep the periph clock enabled if the PWM is still running. */ - if (pwm_is_enabled(&atmel->chip.pwms[0])) + if (!pwm_is_enabled(&atmel->chip.pwms[0])) clk_disable_unprepare(atmel->hlcdc->periph_clk); return 0; -- GitLab From b4bb2291d6d4d54fea9f2c5d7b1e744ce0dac33d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 2 Feb 2024 07:13:29 -0800 Subject: [PATCH 0799/1418] net: blackhole_dev: fix build warning for ethh set but not used [ Upstream commit 843a8851e89e2e85db04caaf88d8554818319047 ] lib/test_blackhole_dev.c sets a variable that is never read, causing this following building warning: lib/test_blackhole_dev.c:32:17: warning: variable 'ethh' set but not used [-Wunused-but-set-variable] Remove the variable struct ethhdr *ethh, which is unused. Fixes: 509e56b37cc3 ("blackhole_dev: add a selftest") Signed-off-by: Breno Leitao Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- lib/test_blackhole_dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c index 4c40580a99a36..f247089d63c08 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/test_blackhole_dev.c @@ -29,7 +29,6 @@ static int __init test_blackholedev_init(void) { struct ipv6hdr *ip6h; struct sk_buff *skb; - struct ethhdr *ethh; struct udphdr *uh; int data_len; int ret; @@ -61,7 +60,7 @@ static int __init test_blackholedev_init(void) ip6h->saddr = in6addr_loopback; ip6h->daddr = in6addr_loopback; /* Ether */ - ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr)); + skb_push(skb, sizeof(struct ethhdr)); skb_set_mac_header(skb, 0); skb->protocol = htons(ETH_P_IPV6); -- GitLab From 71cdbd1fcbe7034b077ed1509f26fd5317f49130 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 2 Feb 2024 10:35:47 +0800 Subject: [PATCH 0800/1418] wifi: ath11k: initialize rx_mcs_80 and rx_mcs_160 before use [ Upstream commit b802e7b7e771dee3377d071418281f8b64d2d832 ] Currently in ath11k_peer_assoc_h_he() rx_mcs_80 and rx_mcs_160 are used to calculate max_nss, see if (support_160) max_nss = min(rx_mcs_80, rx_mcs_160); else max_nss = rx_mcs_80; Kernel test robot complains on uninitialized symbols: drivers/net/wireless/ath/ath11k/mac.c:2321 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. drivers/net/wireless/ath/ath11k/mac.c:2321 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_160'. drivers/net/wireless/ath/ath11k/mac.c:2323 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. This is because there are some code paths that never set them, so the assignment of max_nss can come from uninitialized variables. This could result in some unknown issues since a wrong peer_nss might be passed to firmware. Change to initialize them to an invalid value at the beginning. This makes sense because even max_nss gets an invalid value, due to either or both of them being invalid, we can get an valid peer_nss with following guard: arg->peer_nss = min(sta->deflink.rx_nss, max_nss) Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23 Fixes: 3db26ecf7114 ("ath11k: calculate the correct NSS of peer for HE capabilities") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401311243.NyXwWZxP-lkp@intel.com/ Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240202023547.11141-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 21c6b36dc6ebb..51fc77e93de5c 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2112,6 +2112,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + /* Initialize rx_mcs_160 to 9 which is an invalid value */ + rx_mcs_160 = 9; if (support_160) { for (i = 7; i >= 0; i--) { u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3; @@ -2123,6 +2125,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, } } + /* Initialize rx_mcs_80 to 9 which is an invalid value */ + rx_mcs_80 = 9; for (i = 7; i >= 0; i--) { u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3; -- GitLab From 4d99d267da3415db2124029cb5a6d2d955ca43f9 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 26 Jan 2024 15:53:34 +0800 Subject: [PATCH 0801/1418] wifi: libertas: fix some memleaks in lbs_allocate_cmd_buffer() [ Upstream commit 5f0e4aede01cb01fa633171f0533affd25328c3a ] In the for statement of lbs_allocate_cmd_buffer(), if the allocation of cmdarray[i].cmdbuf fails, both cmdarray and cmdarray[i].cmdbuf needs to be freed. Otherwise, there will be memleaks in lbs_allocate_cmd_buffer(). Fixes: 876c9d3aeb98 ("[PATCH] Marvell Libertas 8388 802.11b/g USB driver") Signed-off-by: Zhipeng Lu Signed-off-by: Kalle Valo Link: https://msgid.link/20240126075336.2825608-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/cmd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cmd.c b/drivers/net/wireless/marvell/libertas/cmd.c index 104d2b6dc9af6..5a525da434c28 100644 --- a/drivers/net/wireless/marvell/libertas/cmd.c +++ b/drivers/net/wireless/marvell/libertas/cmd.c @@ -1132,7 +1132,7 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) if (!cmdarray[i].cmdbuf) { lbs_deb_host("ALLOC_CMD_BUF: ptempvirtualaddr is NULL\n"); ret = -1; - goto done; + goto free_cmd_array; } } @@ -1140,8 +1140,17 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) init_waitqueue_head(&cmdarray[i].cmdwait_q); lbs_cleanup_and_insert_cmd(priv, &cmdarray[i]); } - ret = 0; + return 0; +free_cmd_array: + for (i = 0; i < LBS_NUM_CMD_BUFFERS; i++) { + if (cmdarray[i].cmdbuf) { + kfree(cmdarray[i].cmdbuf); + cmdarray[i].cmdbuf = NULL; + } + } + kfree(priv->cmd_array); + priv->cmd_array = NULL; done: return ret; } -- GitLab From a1f57a0127b89a6b6620514564aa7eaec16d9af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Fri, 2 Feb 2024 17:42:13 +0100 Subject: [PATCH 0802/1418] wifi: wfx: fix memory leak when starting AP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b8cfb7c819dd39965136a66fe3a7fde688d976fc ] Kmemleak reported this error: unreferenced object 0xd73d1180 (size 184): comm "wpa_supplicant", pid 1559, jiffies 13006305 (age 964.245s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 1e 00 01 00 00 00 00 00 ................ backtrace: [<5ca11420>] kmem_cache_alloc+0x20c/0x5ac [<127bdd74>] __alloc_skb+0x144/0x170 [] __netdev_alloc_skb+0x50/0x180 [<0f9fa1d5>] __ieee80211_beacon_get+0x290/0x4d4 [mac80211] [<7accd02d>] ieee80211_beacon_get_tim+0x54/0x18c [mac80211] [<41e25cc3>] wfx_start_ap+0xc8/0x234 [wfx] [<93a70356>] ieee80211_start_ap+0x404/0x6b4 [mac80211] [] nl80211_start_ap+0x76c/0x9e0 [cfg80211] [<47bd8b68>] genl_rcv_msg+0x198/0x378 [<453ef796>] netlink_rcv_skb+0xd0/0x130 [<6b7c977a>] genl_rcv+0x34/0x44 [<66b2d04d>] netlink_unicast+0x1b4/0x258 [] netlink_sendmsg+0x1e8/0x428 [] ____sys_sendmsg+0x1e0/0x274 [] ___sys_sendmsg+0x80/0xb4 [<69954f45>] __sys_sendmsg+0x64/0xa8 unreferenced object 0xce087000 (size 1024): comm "wpa_supplicant", pid 1559, jiffies 13006305 (age 964.246s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 10 00 07 40 00 00 00 00 00 00 00 00 00 00 00 00 ...@............ backtrace: [<9a993714>] __kmalloc_track_caller+0x230/0x600 [] kmalloc_reserve.constprop.0+0x30/0x74 [] __alloc_skb+0xa0/0x170 [] __netdev_alloc_skb+0x50/0x180 [<0f9fa1d5>] __ieee80211_beacon_get+0x290/0x4d4 [mac80211] [<7accd02d>] ieee80211_beacon_get_tim+0x54/0x18c [mac80211] [<41e25cc3>] wfx_start_ap+0xc8/0x234 [wfx] [<93a70356>] ieee80211_start_ap+0x404/0x6b4 [mac80211] [] nl80211_start_ap+0x76c/0x9e0 [cfg80211] [<47bd8b68>] genl_rcv_msg+0x198/0x378 [<453ef796>] netlink_rcv_skb+0xd0/0x130 [<6b7c977a>] genl_rcv+0x34/0x44 [<66b2d04d>] netlink_unicast+0x1b4/0x258 [] netlink_sendmsg+0x1e8/0x428 [] ____sys_sendmsg+0x1e0/0x274 [] ___sys_sendmsg+0x80/0xb4 However, since the kernel is build optimized, it seems the stack is not accurate. It appears the issue is related to wfx_set_mfp_ap(). The issue is obvious in this function: memory allocated by ieee80211_beacon_get() is never released. Fixing this leak makes kmemleak happy. Reported-by: Ulrich Mohr Co-developed-by: Ulrich Mohr Signed-off-by: Ulrich Mohr Fixes: 268bceec1684 ("staging: wfx: fix BA when device is AP and MFP is enabled") Signed-off-by: Jérôme Pouiller Signed-off-by: Kalle Valo Link: https://msgid.link/20240202164213.1606145-1-jerome.pouiller@silabs.com Signed-off-by: Sasha Levin --- drivers/net/wireless/silabs/wfx/sta.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index 073e870b26415..871667650dbef 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -362,6 +362,7 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif) const int pairwise_cipher_suite_count_offset = 8 / sizeof(u16); const int pairwise_cipher_suite_size = 4 / sizeof(u16); const int akm_suite_size = 4 / sizeof(u16); + int ret = -EINVAL; const u16 *ptr; if (unlikely(!skb)) @@ -370,22 +371,26 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif) ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, skb->len - ieoffset); if (unlikely(!ptr)) - return -EINVAL; + goto free_skb; ptr += pairwise_cipher_suite_count_offset; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; ptr += 1 + pairwise_cipher_suite_size * *ptr; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; ptr += 1 + akm_suite_size * *ptr; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); - return 0; + ret = 0; + +free_skb: + dev_kfree_skb(skb); + return ret; } int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, -- GitLab From d73b916bcc62d4b745728449e6e3343dce47476f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 7 Feb 2024 14:47:00 +0106 Subject: [PATCH 0803/1418] printk: Disable passing console lock owner completely during panic() [ Upstream commit d04d5882cd678b898a9d7c5aee6afbe9e6e77fcd ] The commit d51507098ff91 ("printk: disable optimistic spin during panic") added checks to avoid becoming a console waiter if a panic is in progress. However, the transition to panic can occur while there is already a waiter. The current owner should not pass the lock to the waiter because it might get stopped or blocked anytime. Also the panic context might pass the console lock owner to an already stopped waiter by mistake. It might happen when console_flush_on_panic() ignores the current lock owner, for example: CPU0 CPU1 ---- ---- console_lock_spinning_enable() console_trylock_spinning() [CPU1 now console waiter] NMI: panic() panic_other_cpus_shutdown() [stopped as console waiter] console_flush_on_panic() console_lock_spinning_enable() [print 1 record] console_lock_spinning_disable_and_check() [handover to stopped CPU1] This results in panic() not flushing the panic messages. Fix these problems by disabling all spinning operations completely during panic(). Another advantage is that it prevents possible deadlocks caused by "console_owner_lock". The panic() context does not need to take it any longer. The lockless checks are safe because the functions become NOPs when they see the panic in progress. All operations manipulating the state are still synchronized by the lock even when non-panic CPUs would notice the panic synchronously. The current owner might stay spinning. But non-panic() CPUs would get stopped anyway and the panic context will never start spinning. Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20240207134103.1357162-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cc53fb77f77cc..981cdb00b8722 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1797,10 +1797,23 @@ static bool console_waiter; */ static void console_lock_spinning_enable(void) { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. + * Non-panic CPUs abandon the flush anyway. + * + * Just keep the lockdep annotation. The panic-CPU should avoid + * taking console_owner_lock because it might cause a deadlock. + * This looks like the easiest way how to prevent false lockdep + * reports without handling races a lockless way. + */ + if (panic_in_progress()) + goto lockdep; + raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); +lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } @@ -1824,6 +1837,22 @@ static int console_lock_spinning_disable_and_check(void) { int waiter; + /* + * Ignore spinning waiters during panic() because they might get stopped + * or blocked at any time, + * + * It is safe because nobody is allowed to start spinning during panic + * in the first place. If there has been a waiter then non panic CPUs + * might stay spinning. They would get stopped anyway. The panic context + * will never start spinning and an interrupted spin on panic CPU will + * never continue. + */ + if (panic_in_progress()) { + /* Keep lockdep happy. */ + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; -- GitLab From 23f96f86de86c4416b75471b0eee3d883305b4c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 4 Feb 2024 22:20:43 +0100 Subject: [PATCH 0804/1418] pwm: sti: Fix capture for st,pwm-num-chan < st,capture-num-chan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f623835584f1c8d1030666796f40c47a448ce0b ] The driver only used the number of pwm channels to set the pwm_chip's npwm member. The result is that if there are more capture channels than PWM channels specified in the device tree, only a part of the capture channel is usable. Fix that by passing the bigger channel count to the pwm framework. This makes it possible that the .apply() callback is called with .hwpwm >= pwm_num_devs, catch that case and return an error code. Fixes: c97267ae831d ("pwm: sti: Add PWM capture callback") Link: https://lore.kernel.org/r/20240204212043.2951852-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-sti.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 652fdb8dc7bfa..0a7920cbd4949 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -395,8 +395,17 @@ out: static int sti_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { + struct sti_pwm_chip *pc = to_sti_pwmchip(chip); + struct sti_pwm_compat_data *cdata = pc->cdata; + struct device *dev = pc->dev; int err; + if (pwm->hwpwm >= cdata->pwm_num_devs) { + dev_err(dev, "device %u is not valid for pwm mode\n", + pwm->hwpwm); + return -EINVAL; + } + if (state->polarity != PWM_POLARITY_NORMAL) return -EINVAL; @@ -647,7 +656,7 @@ static int sti_pwm_probe(struct platform_device *pdev) pc->chip.dev = dev; pc->chip.ops = &sti_pwm_ops; - pc->chip.npwm = pc->cdata->pwm_num_devs; + pc->chip.npwm = max(cdata->pwm_num_devs, cdata->cpt_num_devs); for (i = 0; i < cdata->cpt_num_devs; i++) { struct sti_cpt_ddata *ddata = &cdata->ddata[i]; -- GitLab From 0697d4862d961c867e8a74ce9031e170664178a5 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:09 +0100 Subject: [PATCH 0805/1418] tools/resolve_btfids: Refactor set sorting with types from btf_ids.h [ Upstream commit 9707ac4fe2f5bac6406d2403f8b8a64d7b3d8e43 ] Instead of using magic offsets to access BTF ID set data, leverage types from btf_ids.h (btf_id_set and btf_id_set8) which define the actual layout of the data. Thanks to this change, set sorting should also continue working if the layout changes. This requires to sync the definition of 'struct btf_id_set8' from include/linux/btf_ids.h to tools/include/linux/btf_ids.h. We don't sync the rest of the file at the moment, b/c that would require to also sync multiple dependent headers and we don't need any other defs from btf_ids.h. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Acked-by: Daniel Xu Link: https://lore.kernel.org/bpf/ff7f062ddf6a00815fda3087957c4ce667f50532.1707223196.git.vmalik@redhat.com Stable-dep-of: 903fad439466 ("tools/resolve_btfids: Fix cross-compilation to non-host endianness") Signed-off-by: Sasha Levin --- tools/bpf/resolve_btfids/main.c | 35 ++++++++++++++++++++------------- tools/include/linux/btf_ids.h | 9 +++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 77058174082d7..cd42977c6a1f4 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ #include #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -161,7 +162,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -441,7 +442,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -649,19 +650,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8; + struct btf_id_set *set; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +670,21 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); - - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 2f882d5cb30f5..72535f00572f6 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ -- GitLab From b4907fb68d45a59cfb701ffde3da7cf92d0b9b5e Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:10 +0100 Subject: [PATCH 0806/1418] tools/resolve_btfids: Fix cross-compilation to non-host endianness [ Upstream commit 903fad4394666bc23975c93fb58f137ce64b5192 ] The .BTF_ids section is pre-filled with zeroed BTF ID entries during the build and afterwards patched by resolve_btfids with correct values. Since resolve_btfids always writes in host-native endianness, it relies on libelf to do the translation when the target ELF is cross-compiled to a different endianness (this was introduced in commit 61e8aeda9398 ("bpf: Fix libelf endian handling in resolv_btfids")). Unfortunately, the translation will corrupt the flags fields of SET8 entries because these were written during vmlinux compilation and are in the correct endianness already. This will lead to numerous selftests failures such as: $ sudo ./test_verifier 502 502 #502/p sleepable fentry accept FAIL Failed to load prog 'Invalid argument'! bpf_fentry_test1 is not sleepable verification time 34 usec stack depth 0 processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 Summary: 0 PASSED, 0 SKIPPED, 1 FAILED Since it's not possible to instruct libelf to translate just certain values, let's manually bswap the flags (both global and entry flags) in resolve_btfids when needed, so that libelf then translates everything correctly. Fixes: ef2c6f370a63 ("tools/resolve_btfids: Add support for 8-byte BTF sets") Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/7b6bff690919555574ce0f13d2a5996cacf7bf69.1707223196.git.vmalik@redhat.com Signed-off-by: Sasha Levin --- tools/bpf/resolve_btfids/main.c | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index cd42977c6a1f4..ef0764d6891e4 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -90,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -117,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -320,6 +329,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -351,6 +361,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -681,6 +698,24 @@ static int sets_patch(struct object *obj) */ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", -- GitLab From 437af288ec7bbe5fe46b9c1e37fcb66c1859e228 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 18:02:09 +0200 Subject: [PATCH 0807/1418] wifi: iwlwifi: mvm: don't set replay counters to 0xff [ Upstream commit d5bd4041cd70faf26fc9a54bd6f172537bbe77f3 ] The firmware (later) actually uses the values even for keys that are invalid as far as the host is concerned, later in rekeying, and then only sets the low 48 bits since the PNs are only 48 bits over the air. It does, however, compare the full 64 bits later, obviously causing problems. Remove the memset and use kzalloc instead to avoid any old heap data leaking to the firmware. We already init all the other fields in the struct anyway. This leaves the data set to zero for any unused fields, so the firmware can look at them safely even if they're not used right now. Fixes: 79e561f0f05a ("iwlwifi: mvm: d3: implement RSC command version 5") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206175739.462101146fef.I10f3855b99417af4247cff04af78dcbc6cb75c9c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 2748459d12279..88f4f429d875c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -461,12 +461,10 @@ static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm, struct wowlan_key_rsc_v5_data data = {}; int i; - data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL); + data.rsc = kzalloc(sizeof(*data.rsc), GFP_KERNEL); if (!data.rsc) return -ENOMEM; - memset(data.rsc, 0xff, sizeof(*data.rsc)); - for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++) data.rsc->mcast_key_id_map[i] = IWL_MCAST_KEY_MAP_INVALID; -- GitLab From 8a2f812b4bfb67821489935105ca294db601f853 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 18 Jan 2024 13:03:39 +0100 Subject: [PATCH 0808/1418] s390/pai: fix attr_event_free upper limit for pai device drivers [ Upstream commit 225d09d6e5f3870560665a1829d2db79330b4c58 ] When the device drivers are initialized, a sysfs directory is created. This contains many attributes which are allocated with kzalloc(). Should it fail, the memory for the attributes already created is freed in attr_event_free(). Its second parameter is number of attribute elements to delete. This parameter is off by one. When i. e. the 10th attribute fails to get created, attributes numbered 0 to 9 should be deleted. Currently only attributes numbered 0 to 8 are deleted. Fixes: 39d62336f5c1 ("s390/pai: add support for cryptography counters") Reported-by: Sumanth Korikkar Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_pai_crypto.c | 2 +- arch/s390/kernel/perf_pai_ext.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 6826e2a69a216..f61a652046cfb 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -647,7 +647,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 74b53c531e0cd..b4d89654183a2 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -612,7 +612,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } -- GitLab From 9b2ca91f6eac4488385454601531a98933f1da5a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Jan 2024 20:14:28 -0700 Subject: [PATCH 0809/1418] s390/vdso: drop '-fPIC' from LDFLAGS [ Upstream commit 0628c03934187be33942580e10bb9afcc61adeed ] '-fPIC' as an option to the linker does not do what it seems like it should. With ld.bfd, it is treated as '-f PIC', which does not make sense based on the meaning of '-f': -f SHLIB, --auxiliary SHLIB Auxiliary filter for shared object symbol table When building with ld.lld (currently under review in a GitHub pull request), it just errors out because '-f' means nothing and neither does '-fPIC': ld.lld: error: unknown argument '-fPIC' '-fPIC' was blindly copied from CFLAGS when the vDSO stopped being linked with '$(CC)', it should not be needed. Remove it to clear up the build failure with ld.lld. Fixes: 2b2a25845d53 ("s390/vdso: Use $(LD) instead of $(CC) to link vDSO") Link: https://github.com/llvm/llvm-project/pull/75643 Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Link: https://lore.kernel.org/r/20240130-s390-vdso-drop-fpic-from-ldflags-v1-1-094ad104fc55@kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso32/Makefile | 2 +- arch/s390/kernel/vdso64/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 245bddfe9bc0e..cc513add48eb5 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ +LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 1605ba45ac4c0..42d918d50a1ff 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -26,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ +ldflags-y := -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) -- GitLab From 4e58093897c90253752b9332bd85814c73c9e98a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 26 Jan 2024 17:36:16 +0100 Subject: [PATCH 0810/1418] selftests: forwarding: Add missing config entries [ Upstream commit 4acf4e62cd572b0c806035046b3698f5585ab821 ] The config file contains a partial kernel configuration to be used by `virtme-configkernel --custom'. The presumption is that the config file contains all Kconfig options needed by the selftests from the directory. In net/forwarding/config, many are missing, which manifests as spurious failures when running the selftests, with messages about unknown device types, qdisc kinds or classifier actions. Add the missing configurations. Tested the resulting configuration using virtme-ng as follows: # vng -b -f tools/testing/selftests/net/forwarding/config # vng --user root (within the VM:) # make -C tools/testing/selftests TARGETS=net/forwarding run_tests Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/025abded7ff9cea5874a7fe35dcd3fd41bf5e6ac.1706286755.git.petrm@nvidia.com Signed-off-by: Paolo Abeni Stable-dep-of: f0ddf15f0a74 ("selftests: forwarding: Add missing multicast routing config entries") Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/config | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278bb..ba23435145827 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,42 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m -- GitLab From 21af11fcb03f92decc0e0701b429f92044138129 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 18:55:38 +0200 Subject: [PATCH 0811/1418] selftests: forwarding: Add missing multicast routing config entries [ Upstream commit f0ddf15f0a74c27eb4b2271a90e69948acc3fa2c ] The two tests that make use of multicast routig (router.sh and router_multicast.sh) are currently failing in the netdev CI because the kernel is missing multicast routing support. Fix by adding the required config entries. Fixes: 6d4efada3b82 ("selftests: forwarding: Add multicast routing test") Signed-off-by: Ido Schimmel Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208165538.1303021-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index ba23435145827..8d7a1a004b7c3 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -9,6 +9,13 @@ CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m -- GitLab From a03ede2282ebbd181bd6f5c38cbfcb5765afcd04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:56 +0000 Subject: [PATCH 0812/1418] ipv6: mcast: remove one synchronize_net() barrier in ipv6_mc_down() [ Upstream commit 17ef8efc00b34918b966388b2af0993811895a8c ] As discussed in the past (commit 2d3916f31891 ("ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report()")) I think the synchronize_net() call in ipv6_mc_down() is not needed. Under load, synchronize_net() can last between 200 usec and 5 ms. KASAN seems to agree as well. Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Cc: Cong Wang Cc: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 566f3b7b957e9..a777695389403 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2722,7 +2722,6 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Should stop work after group drop. or we will * start work again in mld_ifc_event() */ - synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); -- GitLab From ef71a93eec0608d744b246431eb3f4723e0294c0 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 27 May 2022 12:53:54 +0800 Subject: [PATCH 0813/1418] arm64: dts: mt8183: kukui: Split out keyboard node and describe detachables [ Upstream commit 82492c4ef8f65f93cd4a35c4b52518935acbb2fa ] Kukui devices krane, kodana, and kakadu use detachable keyboards, which only have switches to be registered. Change the keyboard node's compatible of those boards to the newly introduced "google,cros-ec-keyb-switches", which won't include matrix properties. Signed-off-by: Hsin-Yi Wang Link: https://lore.kernel.org/r/20220527045353.2483042-1-hsinyi@chromium.org Signed-off-by: Matthias Brugger Stable-dep-of: 04bd6411f506 ("arm64: dts: mt8183: Move CrosEC base detection node to kukui-based DTs") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 ++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 - 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index dccf367c7ec6c..3d95625f1b0b4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -4,6 +4,8 @@ */ #include "mt8183-kukui.dtsi" +/* Must come after mt8183-kukui.dtsi to modify cros_ec */ +#include / { panel: panel { diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi index 50a0dd36b5fb3..a11adeb29b1f2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi @@ -372,6 +372,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KAKADU"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi index 06f8c80bf5536..4864c39e53a4f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi @@ -339,6 +339,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KODAMA"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi index a7b0cb3ff7b0a..d5f41c6c98814 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi @@ -343,6 +343,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "LE_Krane"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index a428a581c93a8..de610874a9125 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -999,5 +999,4 @@ }; }; -#include #include -- GitLab From 6606534538e92d0fba40ef806a5e938b5f6355e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 16 Jan 2024 18:38:34 -0300 Subject: [PATCH 0814/1418] arm64: dts: mt8183: Move CrosEC base detection node to kukui-based DTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 04bd6411f506357fd1faedc2b2156e7ef206aa9a ] The cbas node is used to describe base detection functionality in the ChromeOS EC, which is used for units that have a detachable keyboard and thus rely on this functionality to switch between tablet and laptop mode. Despite the original commit having added the cbas node to the mt8183-kukui.dtsi, not all machines that include it are detachables. In fact all machines that include from mt8183-kukui-jacuzzi.dtsi are either clamshells (ie normal laptops) or convertibles, meaning the keyboard can be flipped but not detached. The detection for the keyboard getting flipped is handled by the driver bound to the keyboard-controller node in the EC. Move the base detection node from the base kukui dtsi to the dtsis where all machines are detachables, and thus actually make use of the node. Fixes: 4fa8492d1e5b ("arm64: dts: mt8183: add cbas node under cros_ec") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240116-mt8183-kukui-cbas-remove-v3-1-055e21406e86@collabora.com Signed-off-by: Matthias Brugger Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 4 ---- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi index a11adeb29b1f2..0d3c7b8162ff0 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi @@ -373,6 +373,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi index 4864c39e53a4f..e73113cb51f53 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi @@ -340,6 +340,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi index d5f41c6c98814..181da69d18f46 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi @@ -344,6 +344,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index de610874a9125..1db97d94658b9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -896,10 +896,6 @@ google,usb-port-id = <0>; }; - cbas { - compatible = "google,cros-cbas"; - }; - typec { compatible = "google,cros-ec-typec"; #address-cells = <1>; -- GitLab From 5c77447aa44479aee694be4e099730643c29bcf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 1 Jan 2024 19:20:40 +0100 Subject: [PATCH 0815/1418] arm64: dts: mediatek: mt7986: add "#reset-cells" to infracfg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d993daff5962b2dd08f32a83bb1c0e5fa75732ea ] MT7986's Infrastructure System Configuration Controller includes reset controller. It can reset blocks as specified in the include/dt-bindings/reset/mt7986-resets.h . Add #reset-cells so it can be referenced properly. This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: infracfg@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# Fixes: 1f9986b258c2 ("arm64: dts: mediatek: add clock support for mt7986a") Cc: Sam Shih Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20240101182040.28538-2-zajec5@gmail.com Signed-off-by: Matthias Brugger Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index fc338bd497f51..108931e796465 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -110,6 +110,7 @@ compatible = "mediatek,mt7986-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; wed_pcie: wed-pcie@10003000 { -- GitLab From fedd55b8a5819c2ae086b0f765fd92b79cdb0406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 7 Feb 2024 15:08:42 -0500 Subject: [PATCH 0816/1418] arm64: dts: mediatek: mt8192-asurada: Remove CrosEC base detection node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b49cabe631b0a25aaf8fc2ba81b5b9ea6ff01b7 ] The commit adding the ChromeOS EC to the Asurada Devicetree mistakenly added a base detection node. While tablet mode detection is supported by CrosEC and used by Hayato, it is done through the cros-ec-keyb driver. The base detection node, which is handled by the hid-google-hammer driver, also provides tablet mode detection but by checking base attachment status on the CrosEC, which is not supported for Asurada. Hence, remove the unused CrosEC base detection node for Asurada. Fixes: eb188a2aaa82 ("arm64: dts: mediatek: asurada: Add ChromeOS EC") Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240207-mt8192-asurada-cbas-remove-v1-1-04cb65951975@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 50367da93cd79..c6080af1e4a30 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -819,10 +819,6 @@ #address-cells = <1>; #size-cells = <0>; - base_detection: cbas { - compatible = "google,cros-cbas"; - }; - cros_ec_pwm: pwm { compatible = "google,cros-ec-pwm"; #pwm-cells = <1>; -- GitLab From 4bc2befb93d181b0565f4cfb2ca26b5caf247952 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 28 Dec 2023 13:32:42 +0200 Subject: [PATCH 0817/1418] arm64: dts: mediatek: mt8192: fix vencoder clock name [ Upstream commit 76aac0f2a46847ed4a7a4fdd848dd66023c19ad1 ] Clock name should be `venc_sel` as per binding. Fix the warning message : arch/arm64/boot/dts/mediatek/mt8192-asurada-hayato-r1.dtb: vcodec@17020000: clock-names:0: 'venc_sel' was expected from schema $id: http://devicetree.org/schemas/media/mediatek,vcodec-encoder.yaml# Fixes: aa8f3711fc87 ("arm64: dts: mt8192: Add H264 venc device node") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231228113245.174706-4-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 2f40c6cc407c1..4ed8a0f187583 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1539,7 +1539,7 @@ mediatek,scp = <&scp>; power-domains = <&spm MT8192_POWER_DOMAIN_VENC>; clocks = <&vencsys CLK_VENC_SET1_VENC>; - clock-names = "venc-set1"; + clock-names = "venc_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D4>; }; -- GitLab From 1e33bdd0239487ae5a414a5854b44e272a6cc875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 22 Jan 2024 14:23:57 +0100 Subject: [PATCH 0818/1418] arm64: dts: mediatek: mt7622: add missing "device_type" to memory nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 99d100e00144bc01b49a697f4bc4398f2f7e7ce4 ] This fixes: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: /: memory@40000000: 'device_type' is a required property from schema $id: http://devicetree.org/schemas/memory.yaml# arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dtb: /: memory@40000000: 'device_type' is a required property from schema $id: http://devicetree.org/schemas/memory.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240122132357.31264-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 1 + arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 2c35ed0734a47..b1ddc491d2936 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -74,6 +74,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x40000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index f9313b697ac12..527dcb279ba52 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -56,6 +56,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x20000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { -- GitLab From 8bfc6b840a9542f8d5ba00a710a60e6387d272d5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 6 Feb 2024 23:01:02 -0800 Subject: [PATCH 0819/1418] bpf: Mark bpf_spin_{lock,unlock}() helpers with notrace correctly [ Upstream commit 178c54666f9c4d2f49f2ea661d0c11b52f0ed190 ] Currently tracing is supposed not to allow for bpf_spin_{lock,unlock}() helper calls. This is to prevent deadlock for the following cases: - there is a prog (prog-A) calling bpf_spin_{lock,unlock}(). - there is a tracing program (prog-B), e.g., fentry, attached to bpf_spin_lock() and/or bpf_spin_unlock(). - prog-B calls bpf_spin_{lock,unlock}(). For such a case, when prog-A calls bpf_spin_{lock,unlock}(), a deadlock will happen. The related source codes are below in kernel/bpf/helpers.c: notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) notrace is supposed to prevent fentry prog from attaching to bpf_spin_{lock,unlock}(). But actually this is not the case and fentry prog can successfully attached to bpf_spin_lock(). Siddharth Chintamaneni reported the issue in [1]. The following is the macro definition for above BPF_CALL_1: #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) #define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) The notrace attribute is actually applied to the static always_inline function ____bpf_spin_{lock,unlock}(). The actual callback function bpf_spin_{lock,unlock}() is not marked with notrace, hence allowing fentry prog to attach to two helpers, and this may cause the above mentioned deadlock. Siddharth Chintamaneni actually has a reproducer in [2]. To fix the issue, a new macro NOTRACE_BPF_CALL_1 is introduced which will add notrace attribute to the original function instead of the hidden always_inline function and this fixed the problem. [1] https://lore.kernel.org/bpf/CAE5sdEigPnoGrzN8WU7Tx-h-iFuMZgW06qp0KHWtpvoXxf1OAQ@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CAE5sdEg6yUc_Jz50AnUXEEUh6O73yQ1Z6NV2srJnef0ZrQkZew@mail.gmail.com/ Fixes: d83525ca62cf ("bpf: introduce bpf_spin_lock") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240207070102.335167-1-yonghong.song@linux.dev Signed-off-by: Sasha Levin --- include/linux/filter.h | 21 ++++++++++++--------- kernel/bpf/helpers.c | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed0..face590b24e17 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -495,24 +495,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 83f8f67e933df..758510b46d87b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -328,7 +328,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) __this_cpu_write(irqsave_flags, flags); } -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; @@ -350,7 +350,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) local_irq_restore(flags); } -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; -- GitLab From e2fed151d53d061495891e75681bf8eccb65b33e Mon Sep 17 00:00:00 2001 From: Craig Tatlor Date: Sat, 10 Feb 2024 17:45:40 +0100 Subject: [PATCH 0820/1418] ARM: dts: qcom: msm8974: correct qfprom node size [ Upstream commit 724c4bf0e4bf81dba77736afb93964c986c3c123 ] The qfprom actually is bigger than 0x1000, so adjust the reg. Note that the non-ECC-corrected qfprom can be found at 0xfc4b8000 (-0x4000). The current reg points to the ECC-corrected qfprom block which should have equivalent values at all offsets compared to the non-corrected version. [luca@z3ntu.xyz: extract to standalone patch and adjust for review comments] Fixes: c59ffb519357 ("arm: dts: msm8974: Add thermal zones, tsens and qfprom nodes") Signed-off-by: Craig Tatlor Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240210-msm8974-qfprom-v3-1-26c424160334@z3ntu.xyz Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-msm8974.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index c4b2e9ac24940..5ea45e486ed54 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1134,7 +1134,7 @@ qfprom: qfprom@fc4bc000 { compatible = "qcom,msm8974-qfprom", "qcom,qfprom"; - reg = <0xfc4bc000 0x1000>; + reg = <0xfc4bc000 0x2100>; #address-cells = <1>; #size-cells = <1>; tsens_calib: calib@d0 { -- GitLab From a9545af2a533739ffb64d6c9a6fec6f13e2b505f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Mon, 12 Feb 2024 13:57:37 +0100 Subject: [PATCH 0821/1418] wifi: wilc1000: prevent use-after-free on vif when cleaning up all interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb5942b77c05d54310a0420cac12935e9b6aa21c ] wilc_netdev_cleanup currently triggers a KASAN warning, which can be observed on interface registration error path, or simply by removing the module/unbinding device from driver: echo spi0.1 > /sys/bus/spi/drivers/wilc1000_spi/unbind ================================================================== BUG: KASAN: slab-use-after-free in wilc_netdev_cleanup+0x508/0x5cc Read of size 4 at addr c54d1ce8 by task sh/86 CPU: 0 PID: 86 Comm: sh Not tainted 6.8.0-rc1+ #117 Hardware name: Atmel SAMA5 unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x58 dump_stack_lvl from print_report+0x154/0x500 print_report from kasan_report+0xac/0xd8 kasan_report from wilc_netdev_cleanup+0x508/0x5cc wilc_netdev_cleanup from wilc_bus_remove+0xc8/0xec wilc_bus_remove from spi_remove+0x8c/0xac spi_remove from device_release_driver_internal+0x434/0x5f8 device_release_driver_internal from unbind_store+0xbc/0x108 unbind_store from kernfs_fop_write_iter+0x398/0x584 kernfs_fop_write_iter from vfs_write+0x728/0xf88 vfs_write from ksys_write+0x110/0x1e4 ksys_write from ret_fast_syscall+0x0/0x1c [...] Allocated by task 1: kasan_save_track+0x30/0x5c __kasan_kmalloc+0x8c/0x94 __kmalloc_node+0x1cc/0x3e4 kvmalloc_node+0x48/0x180 alloc_netdev_mqs+0x68/0x11dc alloc_etherdev_mqs+0x28/0x34 wilc_netdev_ifc_init+0x34/0x8ec wilc_cfg80211_init+0x690/0x910 wilc_bus_probe+0xe0/0x4a0 spi_probe+0x158/0x1b0 really_probe+0x270/0xdf4 __driver_probe_device+0x1dc/0x580 driver_probe_device+0x60/0x140 __driver_attach+0x228/0x5d4 bus_for_each_dev+0x13c/0x1a8 bus_add_driver+0x2a0/0x608 driver_register+0x24c/0x578 do_one_initcall+0x180/0x310 kernel_init_freeable+0x424/0x484 kernel_init+0x20/0x148 ret_from_fork+0x14/0x28 Freed by task 86: kasan_save_track+0x30/0x5c kasan_save_free_info+0x38/0x58 __kasan_slab_free+0xe4/0x140 kfree+0xb0/0x238 device_release+0xc0/0x2a8 kobject_put+0x1d4/0x46c netdev_run_todo+0x8fc/0x11d0 wilc_netdev_cleanup+0x1e4/0x5cc wilc_bus_remove+0xc8/0xec spi_remove+0x8c/0xac device_release_driver_internal+0x434/0x5f8 unbind_store+0xbc/0x108 kernfs_fop_write_iter+0x398/0x584 vfs_write+0x728/0xf88 ksys_write+0x110/0x1e4 ret_fast_syscall+0x0/0x1c [...] David Mosberger-Tan initial investigation [1] showed that this use-after-free is due to netdevice unregistration during vif list traversal. When unregistering a net device, since the needs_free_netdev has been set to true during registration, the netdevice object is also freed, and as a consequence, the corresponding vif object too, since it is attached to it as private netdevice data. The next occurrence of the loop then tries to access freed vif pointer to the list to move forward in the list. Fix this use-after-free thanks to two mechanisms: - navigate in the list with list_for_each_entry_safe, which allows to safely modify the list as we go through each element. For each element, remove it from the list with list_del_rcu - make sure to wait for RCU grace period end after each vif removal to make sure it is safe to free the corresponding vif too (through unregister_netdev) Since we are in a RCU "modifier" path (not a "reader" path), and because such path is expected not to be concurrent to any other modifier (we are using the vif_mutex lock), we do not need to use RCU list API, that's why we can benefit from list_for_each_entry_safe. [1] https://lore.kernel.org/linux-wireless/ab077dbe58b1ea5de0a3b2ca21f275a07af967d2.camel@egauge.net/ Fixes: 8399918f3056 ("staging: wilc1000: use RCU list to maintain vif interfaces list") Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240212-wilc_rework_deinit-v1-1-9203ae56c27f@bootlin.com Signed-off-by: Sasha Levin --- .../net/wireless/microchip/wilc1000/netdev.c | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 0e6eeeed2e086..b714da48eaa17 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -878,8 +878,7 @@ static const struct net_device_ops wilc_netdev_ops = { void wilc_netdev_cleanup(struct wilc *wilc) { - struct wilc_vif *vif; - int srcu_idx, ifc_cnt = 0; + struct wilc_vif *vif, *vif_tmp; if (!wilc) return; @@ -889,32 +888,19 @@ void wilc_netdev_cleanup(struct wilc *wilc) wilc->firmware = NULL; } - srcu_idx = srcu_read_lock(&wilc->srcu); - list_for_each_entry_rcu(vif, &wilc->vif_list, list) { + list_for_each_entry_safe(vif, vif_tmp, &wilc->vif_list, list) { + mutex_lock(&wilc->vif_mutex); + list_del_rcu(&vif->list); + wilc->vif_num--; + mutex_unlock(&wilc->vif_mutex); + synchronize_srcu(&wilc->srcu); if (vif->ndev) unregister_netdev(vif->ndev); } - srcu_read_unlock(&wilc->srcu, srcu_idx); wilc_wfi_deinit_mon_interface(wilc, false); destroy_workqueue(wilc->hif_workqueue); - while (ifc_cnt < WILC_NUM_CONCURRENT_IFC) { - mutex_lock(&wilc->vif_mutex); - if (wilc->vif_num <= 0) { - mutex_unlock(&wilc->vif_mutex); - break; - } - vif = wilc_get_wl_to_vif(wilc); - if (!IS_ERR(vif)) - list_del_rcu(&vif->list); - - wilc->vif_num--; - mutex_unlock(&wilc->vif_mutex); - synchronize_srcu(&wilc->srcu); - ifc_cnt++; - } - wilc_wlan_cfg_deinit(wilc); wlan_deinit_locks(wilc); wiphy_unregister(wilc->wiphy); -- GitLab From fad9bcd4d754cc689c19dc04d2c44b82c1a5d6c8 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:41:58 +0100 Subject: [PATCH 0822/1418] ACPI: processor_idle: Fix memory leak in acpi_processor_power_exit() [ Upstream commit e18afcb7b2a12b635ac10081f943fcf84ddacc51 ] After unregistering the CPU idle device, the memory associated with it is not freed, leading to a memory leak: unreferenced object 0xffff896282f6c000 (size 1024): comm "swapper/0", pid 1, jiffies 4294893170 hex dump (first 32 bytes): 00 00 00 00 0b 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 8836a742): [] kmalloc_trace+0x29d/0x340 [] acpi_processor_power_init+0xf3/0x1c0 [] __acpi_processor_start+0xd3/0xf0 [] acpi_processor_start+0x2c/0x50 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] acpi_processor_driver_init+0x3b/0xc0 [] do_one_initcall+0x41/0x300 [] kernel_init_freeable+0x320/0x470 [] kernel_init+0x16/0x1b0 [] ret_from_fork+0x2d/0x50 Fix this by freeing the CPU idle device after unregistering it. Fixes: 3d339dcbb56d ("cpuidle / ACPI : move cpuidle_device field out of the acpi_processor_power structure") Signed-off-by: Armin Wolf Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/processor_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index fc5b5b2c9e819..6f613eef28879 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1431,6 +1431,8 @@ int acpi_processor_power_exit(struct acpi_processor *pr) acpi_processor_registered--; if (acpi_processor_registered == 0) cpuidle_unregister_driver(&acpi_idle_driver); + + kfree(dev); } pr->flags.power_setup_done = 0; -- GitLab From 031d2acc4217dc953769f2c324787b39a8aebbaa Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 16 Feb 2024 10:02:37 +0000 Subject: [PATCH 0823/1418] bus: tegra-aconnect: Update dependency to ARCH_TEGRA [ Upstream commit 4acd21a45c1446277e2abaece97d7fa7c2e692a9 ] Update the architecture dependency to be the generic Tegra because the driver works on the four latest Tegra generations not just Tegra210, if you build a kernel with a specific ARCH_TEGRA_xxx_SOC option that excludes Tegra210 you don't get this driver. Fixes: 46a88534afb59 ("bus: Add support for Tegra ACONNECT") Signed-off-by: Peter Robinson Cc: Jon Hunter Cc: Thierry Reding Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/bus/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 7bfe998f3514a..bdc7633905504 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -186,11 +186,12 @@ config SUNXI_RSB config TEGRA_ACONNECT tristate "Tegra ACONNECT Bus Driver" - depends on ARCH_TEGRA_210_SOC + depends on ARCH_TEGRA depends on OF && PM help Driver for the Tegra ACONNECT bus which is used to interface with - the devices inside the Audio Processing Engine (APE) for Tegra210. + the devices inside the Audio Processing Engine (APE) for + Tegra210 and later. config TEGRA_GMI tristate "Tegra Generic Memory Interface bus driver" -- GitLab From 386c2487754ffd6f7db0f8bee2c5dc4019385443 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 22 Jan 2024 17:34:00 -0600 Subject: [PATCH 0824/1418] iommu/amd: Mark interrupt as managed [ Upstream commit 0feda94c868d396fac3b3cb14089d2d989a07c72 ] On many systems that have an AMD IOMMU the following sequence of warnings is observed during bootup. ``` pci 0000:00:00.2 can't derive routing for PCI INT A pci 0000:00:00.2: PCI INT A: not connected ``` This series of events happens because of the IOMMU initialization sequence order and the lack of _PRT entries for the IOMMU. During initialization the IOMMU driver first enables the PCI device using pci_enable_device(). This will call acpi_pci_irq_enable() which will check if the interrupt is declared in a PCI routing table (_PRT) entry. According to the PCI spec [1] these routing entries are only required under PCI root bridges: The _PRT object is required under all PCI root bridges The IOMMU is directly connected to the root complex, so there is no parent bridge to look for a _PRT entry. The first warning is emitted since no entry could be found in the hierarchy. The second warning is then emitted because the interrupt hasn't yet been configured to any value. The pin was configured in pci_read_irq() but the byte in PCI_INTERRUPT_LINE return 0xff which means "Unknown". After that sequence of events pci_enable_msi() is called and this will allocate an interrupt. That is both of these warnings are totally harmless because the IOMMU uses MSI for interrupts. To avoid even trying to probe for a _PRT entry mark the IOMMU as IRQ managed. This avoids both warnings. Link: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/06_Device_Configuration/Device_Configuration.html?highlight=_prt#prt-pci-routing-table [1] Signed-off-by: Mario Limonciello Fixes: cffe0a2b5a34 ("x86, irq: Keep balance of IOAPIC pin reference count") Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240122233400.1802-1-mario.limonciello@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f6e64c9858021..cc94ac6662339 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2047,6 +2047,9 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) /* Prevent binding other PCI device drivers to IOMMU devices */ iommu->dev->match_driver = false; + /* ACPI _PRT won't have an IRQ for IOMMU */ + iommu->dev->irq_managed = 1; + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); -- GitLab From 093cec79f0bbbe59e2ac14e37709d1d094351954 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:05:37 +0100 Subject: [PATCH 0825/1418] wifi: brcmsmac: avoid function pointer casts [ Upstream commit e1ea6db35fc3ba5ff063f097385e9f7a88c25356 ] An old cleanup went a little too far and causes a warning with clang-16 and higher as it breaks control flow integrity (KCFI) rules: drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c:64:34: error: cast from 'void (*)(struct brcms_phy *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 64 | brcms_init_timer(physhim->wl, (void (*)(void *))fn, | ^~~~~~~~~~~~~~~~~~~~ Change this one instance back to passing a void pointer so it can be used with the timer callback interface. Fixes: d89a4c80601d ("staging: brcm80211: removed void * from softmac phy") Signed-off-by: Arnd Bergmann Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240213100548.457854-1-arnd@kernel.org Signed-off-by: Sasha Levin --- .../net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c | 3 ++- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c | 5 ++--- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c index ccc621b8ed9f2..4a1fe982a948e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c @@ -383,8 +383,9 @@ struct shared_phy *wlc_phy_shared_attach(struct shared_phy_params *shp) return sh; } -static void wlc_phy_timercb_phycal(struct brcms_phy *pi) +static void wlc_phy_timercb_phycal(void *ptr) { + struct brcms_phy *pi = ptr; uint delay = 5; if (PHY_PERICAL_MPHASE_PENDING(pi)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c index a0de5db0cd646..b723817915365 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c @@ -57,12 +57,11 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim) } struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name) { return (struct wlapi_timer *) - brcms_init_timer(physhim->wl, (void (*)(void *))fn, - arg, name); + brcms_init_timer(physhim->wl, fn, arg, name); } void wlapi_free_timer(struct wlapi_timer *t) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h index dd8774717adee..27d0934e600ed 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h @@ -131,7 +131,7 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim); /* PHY to WL utility functions */ struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name); void wlapi_free_timer(struct wlapi_timer *t); void wlapi_add_timer(struct wlapi_timer *t, uint ms, int periodic); -- GitLab From f95febbffe98601b748eda9eae3eec082b1c39e4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jan 2024 14:12:15 +0100 Subject: [PATCH 0826/1418] arm64: dts: qcom: sdm845-db845c: correct PCIe wake-gpios [ Upstream commit 584a327c5cffc36369b2a8953d9448826240f1ac ] Bindings allow a "wake", not "enable", GPIO. Schematics also use WAKE name for the pin: sdm845-db845c.dtb: pcie@1c00000: Unevaluated properties are not allowed ('enable-gpio' was unexpected) Fixes: 4a657c264b78 ("arm64: dts: qcom: db845c: Enable PCIe controllers") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240108131216.53867-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 135ff4368c4a6..5c04c91b0ee2b 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -532,7 +532,7 @@ &pcie0 { status = "okay"; perst-gpios = <&tlmm 35 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 134 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 134 GPIO_ACTIVE_HIGH>; vddpe-3v3-supply = <&pcie0_3p3v_dual>; -- GitLab From 9c23056893a4e359146c392f70808dc4362fa299 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Nov 2023 17:42:26 +0100 Subject: [PATCH 0827/1418] arm64: dts: qcom: sm8150: use 'gpios' suffix for PCI GPIOs [ Upstream commit af6f6778d34cb40e60368e288767f674cc0c5f60 ] Linux handles both versions, but bindings expect GPIO properties to have 'gpios' suffix instead of 'gpio': sa8155p-adp.dtb: pci@1c00000: Unevaluated properties are not allowed ('perst-gpio' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231111164229.63803-3-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 7c38989d0f7a ("arm64: dts: qcom: sm8150: correct PCIe wake-gpios") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index eb1a9369926d2..b829a9ebc5670 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1822,7 +1822,7 @@ phys = <&pcie0_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 35 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; @@ -1925,7 +1925,7 @@ phys = <&pcie1_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 102 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 102 GPIO_ACTIVE_HIGH>; enable-gpio = <&tlmm 104 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; -- GitLab From 88611c1fdca4e9dc85c80a94863b69363147f9cb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jan 2024 14:12:16 +0100 Subject: [PATCH 0828/1418] arm64: dts: qcom: sm8150: correct PCIe wake-gpios [ Upstream commit 7c38989d0f7a35c83e7c4781271d42662903fa8d ] Bindings allow a "wake", not "enable", GPIO. Schematics also use WAKE name for the pin: sa8155p-adp.dtb: pcie@1c00000: Unevaluated properties are not allowed ('enable-gpio' was unexpected) Fixes: a1c86c680533 ("arm64: dts: qcom: sm8150: Add PCIe nodes") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240108131216.53867-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index b829a9ebc5670..9dccecd9fcaef 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1823,7 +1823,7 @@ phy-names = "pciephy"; perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; - enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; -- GitLab From 2b718bb18f5fe24ada8311ad67e89141a0c062b0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 13 Feb 2024 23:39:47 +0100 Subject: [PATCH 0829/1418] powercap: dtpm_cpu: Fix error check against freq_qos_add_request() [ Upstream commit b50155cb0d609437236c88201206267835c6f965 ] The caller of the function freq_qos_add_request() checks again a non zero value but freq_qos_add_request() can return '1' if the request already exists. Therefore, the setup function fails while the QoS request actually did not failed. Fix that by changing the check against a negative value like all the other callers of the function. Fixes: 0e8f68d7f0485 ("Add CPU energy model based support") Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/dtpm_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 9193c3b8edebe..ae7ee611978ba 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -219,7 +219,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, pd->table[pd->nr_perf_states - 1].frequency); - if (ret) + if (ret < 0) goto out_dtpm_unregister; cpufreq_cpu_put(policy); -- GitLab From 4c51575705d2c2bc7d04be26079e204ee5be6f23 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Feb 2024 17:31:04 -0500 Subject: [PATCH 0830/1418] net: ena: Remove ena_select_queue [ Upstream commit 78e886ba2b549945ecada055ee0765f0ded5707a ] Avoid the following warnings by removing the ena_select_queue() function and rely on the net core to do the queue selection, The issue happen when an skb received from an interface with more queues than ena is forwarded to the ena interface. [ 1176.159959] eth0 selects TX queue 11, but real number of TX queues is 8 [ 1176.863976] eth0 selects TX queue 14, but real number of TX queues is 8 [ 1180.767877] eth0 selects TX queue 14, but real number of TX queues is 8 [ 1188.703742] eth0 selects TX queue 14, but real number of TX queues is 8 Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Kamal Heib Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 044b8afde69a0..9e82e7b9c3b72 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3174,22 +3174,6 @@ error_drop_packet: return NETDEV_TX_OK; } -static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - u16 qid; - /* we suspect that this is good for in--kernel network services that - * want to loop incoming skb rx to tx in normal user generated traffic, - * most probably we will not get to this - */ - if (skb_rx_queue_recorded(skb)) - qid = skb_get_rx_queue(skb); - else - qid = netdev_pick_tx(dev, skb, NULL); - - return qid; -} - static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -3359,7 +3343,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_open = ena_open, .ndo_stop = ena_close, .ndo_start_xmit = ena_start_xmit, - .ndo_select_queue = ena_select_queue, .ndo_get_stats64 = ena_get_stats64, .ndo_tx_timeout = ena_tx_timeout, .ndo_change_mtu = ena_change_mtu, -- GitLab From c20211d3df54e410f6ce097959174132594a6add Mon Sep 17 00:00:00 2001 From: Hsin-Te Yuan Date: Wed, 24 Jan 2024 07:51:57 +0000 Subject: [PATCH 0831/1418] arm64: dts: mt8195-cherry-tomato: change watchdog reset boot flow [ Upstream commit ef569d5db50e7edd709e482157769a5b3c367e22 ] The external output reset signal was originally disabled and sent from firmware. However, an unfixed bug in the firmware on tomato prevents the signal from being sent, causing the device to fail to boot. To fix this, enable external output reset signal to allow the device to reboot normally. Fixes: 5eb2e303ec6b ("arm64: dts: mediatek: Introduce MT8195 Cherry platform's Tomato") Signed-off-by: Hsin-Te Yuan Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240124-send-upstream-v3-1-5097c9862a73@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts | 4 ++++ arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts | 4 ++++ arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts index 3348ba69ff6cf..d86d193e5a75e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts @@ -13,3 +13,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts index 4669e9d917f8c..5356f53308e24 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts @@ -33,3 +33,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts index 5021edd02f7c1..fca3606cb951e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts @@ -34,3 +34,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; -- GitLab From 0d276d9f335f41d6524258d58c0c0241ef9a83a4 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 26 Jan 2024 12:23:25 +0000 Subject: [PATCH 0832/1418] firmware: arm_scmi: Fix double free in SMC transport cleanup path [ Upstream commit f1d71576d2c9ec8fdb822173fa7f3de79475e9bd ] When the generic SCMI code tears down a channel, it calls the chan_free callback function, defined by each transport. Since multiple protocols might share the same transport_info member, chan_free() might want to clean up the same member multiple times within the given SCMI transport implementation. In this case, it is SMC transport. This will lead to a NULL pointer dereference at the second time: | scmi_protocol scmi_dev.1: Enabled polling mode TX channel - prot_id:16 | arm-scmi firmware:scmi: SCMI Notifications - Core Enabled. | arm-scmi firmware:scmi: unable to communicate with SCMI | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 | CM = 0, WnR = 0, TnD = 0, TagAccess = 0 | GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 | user pgtable: 4k pages, 48-bit VAs, pgdp=0000000881ef8000 | [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 | Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP | Modules linked in: | CPU: 4 PID: 1 Comm: swapper/0 Not tainted 6.7.0-rc2-00124-g455ef3d016c9-dirty #793 | Hardware name: FVP Base RevC (DT) | pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) | pc : smc_chan_free+0x3c/0x6c | lr : smc_chan_free+0x3c/0x6c | Call trace: | smc_chan_free+0x3c/0x6c | idr_for_each+0x68/0xf8 | scmi_cleanup_channels.isra.0+0x2c/0x58 | scmi_probe+0x434/0x734 | platform_probe+0x68/0xd8 | really_probe+0x110/0x27c | __driver_probe_device+0x78/0x12c | driver_probe_device+0x3c/0x118 | __driver_attach+0x74/0x128 | bus_for_each_dev+0x78/0xe0 | driver_attach+0x24/0x30 | bus_add_driver+0xe4/0x1e8 | driver_register+0x60/0x128 | __platform_driver_register+0x28/0x34 | scmi_driver_init+0x84/0xc0 | do_one_initcall+0x78/0x33c | kernel_init_freeable+0x2b8/0x51c | kernel_init+0x24/0x130 | ret_from_fork+0x10/0x20 | Code: f0004701 910a0021 aa1403e5 97b91c70 (b9400280) | ---[ end trace 0000000000000000 ]--- Simply check for the struct pointer being NULL before trying to access its members, to avoid this situation. This was found when a transport doesn't really work (for instance no SMC service), the probe routines then tries to clean up, and triggers a crash. Signed-off-by: Andre Przywara Fixes: 1dc6558062da ("firmware: arm_scmi: Add smc/hvc transport") Reviewed-by: Cristian Marussi Link: https://lore.kernel.org/r/20240126122325.2039669-1-andre.przywara@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/smc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index ac0bd51ef16a2..42ea308a2c1d5 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -171,6 +171,13 @@ static int smc_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_smc *scmi_info = cinfo->transport_info; + /* + * Different protocols might share the same chan info, so a previous + * smc_chan_free call might have already freed the structure. + */ + if (!scmi_info) + return 0; + /* Ignore any possible further reception on the IRQ path */ if (scmi_info->irq > 0) free_irq(scmi_info->irq, scmi_info); -- GitLab From 3936e0f81ac4c042e461a35ea8658bf5dbd8b9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Sat, 17 Feb 2024 14:22:41 +0100 Subject: [PATCH 0833/1418] wifi: wilc1000: revert reset line logic flip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f3ec643947634bed41b97bd56b248f7c78498eab ] This reverts commit fcf690b0b47494df51d214db5c5a714a400b0257. When using a wilc1000 chip over a spi bus, users can optionally define a reset gpio and a chip enable gpio. The reset line of wilc1000 is active low, so to hold the chip in reset, a low (physical) value must be applied. The corresponding device tree binding documentation was introduced by commit f31ee3c0a555 ("wilc1000: Document enable-gpios and reset-gpios properties") and correctly indicates that the reset line is an active-low signal. The corresponding driver part, brought by commit ec031ac4792c ("wilc1000: Add reset/enable GPIO support to SPI driver") was applying the correct logic. But commit fcf690b0b474 ("wifi: wilc1000: use correct sequence of RESET for chip Power-UP/Down") eventually flipped this logic and started misusing the gpiod APIs, applying an inverted logic when powering up/down the chip (for example, setting the reset line to a logic "1" during power up, which in fact asserts the reset line when device tree describes the reset line as GPIO_ACTIVE_LOW). As a consequence, any platform currently using the driver in SPI mode must use a faulty reset line description in device tree, or else chip will be maintained in reset and will not even allow to bring up the chip. Fix reset line usage by inverting back the gpiod APIs usage, setting the reset line to the logic value "0" when powering the chip, and the logic value "1" when powering off the chip. Fixes: fcf690b0b474 ("wifi: wilc1000: use correct sequence of RESET for chip Power-UP/Down") Signed-off-by: Alexis Lothoré Acked-by: Conor Dooley Acked-by: Ajay Singh Signed-off-by: Kalle Valo Link: https://msgid.link/20240217-wilc_1000_reset_line-v2-1-b216f433d7d5@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/spi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index b0fc5e68feeca..5877e2c1fa0fc 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -191,11 +191,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on) /* assert ENABLE: */ gpiod_set_value(gpios->enable, 1); mdelay(5); - /* assert RESET: */ - gpiod_set_value(gpios->reset, 1); - } else { /* deassert RESET: */ gpiod_set_value(gpios->reset, 0); + } else { + /* assert RESET: */ + gpiod_set_value(gpios->reset, 1); /* deassert ENABLE: */ gpiod_set_value(gpios->enable, 0); } -- GitLab From 5425ac2428b34b6ca483e81aeee7ae93832e027b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:03:04 +0200 Subject: [PATCH 0834/1418] ARM: dts: arm: realview: Fix development chip ROM compatible value [ Upstream commit 3baa4c5143d65ebab2de0d99a395e5f4f1f46608 ] When the development chip ROM was added, the "direct-mapped" compatible value was already obsolete. In addition, the device node lacked the accompanying "probe-type" property, causing the old physmap_of_core driver to fall back to trying all available probe types. Unfortunately this fallback was lost when the DT and pdata cases were merged. Fix this by using the modern "mtd-rom" compatible value instead. Fixes: 5c3f5edbe0a1dff3 ("ARM: realview: add flash devices to the PB1176 DTS") Fixes: 642b1e8dbed7bbbf ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/arm-realview-pb1176.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index efed325af88d2..d99bac02232b3 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -451,7 +451,7 @@ /* Direct-mapped development chip ROM */ pb1176_rom@10200000 { - compatible = "direct-mapped"; + compatible = "mtd-rom"; reg = <0x10200000 0x4000>; bank-width = <1>; }; -- GitLab From 96132cc2e159ccaa0619f11975016dc7d7694572 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 25 Oct 2022 23:06:29 +0100 Subject: [PATCH 0835/1418] arm64: dts: renesas: r9a07g043: Split out RZ/G2UL SoC specific parts [ Upstream commit b9a0be2054964026aa58966ce9724b672f210835 ] Move RZ/G2UL SoC specific parts to r9a07g043u.dtsi so that r9a07g043.dtsi can be shared with RZ/Five (RISC-V SoC). Below are the changes due to which SoC specific parts are moved to r9a07g043u.dtsi: - RZ/G2UL has Cortex-A55 (ARM64) whereas RZ/Five has AX45MP (RISC-V), - RZ/G2UL has GICv3 as interrupt controller whereas RZ/Five has PLIC, - RZ/G2UL has interrupts for SYSC block whereas interrupts are missing for SYSC block on RZ/Five, - RZ/G2UL has armv8-timer whereas RZ/Five has riscv-timer, - RZ/G2UL has PSCI whereas RZ/Five have OpenSBI. Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20221025220629.79321-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 14fe225dd5fc ("arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043.dtsi | 54 +------------------ arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 60 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 53 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi index a4738842f0646..7f88395ff7997 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* - * Device Tree Source for the RZ/G2UL SoC + * Device Tree Source for the RZ/Five and RZ/G2UL SoCs * * Copyright (C) 2022 Renesas Electronics Corp. */ @@ -68,36 +68,8 @@ }; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu0: cpu@0 { - compatible = "arm,cortex-a55"; - reg = <0>; - device_type = "cpu"; - #cooling-cells = <2>; - next-level-cache = <&L3_CA55>; - enable-method = "psci"; - clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; - operating-points-v2 = <&cluster0_opp>; - }; - - L3_CA55: cache-controller-0 { - compatible = "cache"; - cache-unified; - cache-size = <0x40000>; - }; - }; - - psci { - compatible = "arm,psci-1.0", "arm,psci-0.2"; - method = "smc"; - }; - soc: soc { compatible = "simple-bus"; - interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; ranges; @@ -545,12 +517,6 @@ sysc: system-controller@11020000 { compatible = "renesas,r9a07g043-sysc"; reg = <0 0x11020000 0 0x10000>; - interrupts = , - , - , - ; - interrupt-names = "lpm_int", "ca55stbydone_int", - "cm33stbyr_int", "ca55_deny"; status = "disabled"; }; @@ -603,16 +569,6 @@ dma-channels = <16>; }; - gic: interrupt-controller@11900000 { - compatible = "arm,gic-v3"; - #interrupt-cells = <3>; - #address-cells = <0>; - interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; - interrupts = ; - }; - sdhi0: mmc@11c00000 { compatible = "renesas,sdhi-r9a07g043", "renesas,rcar-gen3-sdhi"; @@ -893,12 +849,4 @@ }; }; }; - - timer { - compatible = "arm,armv8-timer"; - interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; - }; }; diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index 96f935bc2d4d1..b8bf06b512351 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -10,3 +10,63 @@ #define SOC_PERIPHERAL_IRQ(nr) GIC_SPI nr #include "r9a07g043.dtsi" + +/ { + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "arm,cortex-a55"; + reg = <0>; + device_type = "cpu"; + #cooling-cells = <2>; + next-level-cache = <&L3_CA55>; + enable-method = "psci"; + clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; + operating-points-v2 = <&cluster0_opp>; + }; + + L3_CA55: cache-controller-0 { + compatible = "cache"; + cache-unified; + cache-size = <0x40000>; + }; + }; + + psci { + compatible = "arm,psci-1.0", "arm,psci-0.2"; + method = "smc"; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; + }; +}; + +&soc { + interrupt-parent = <&gic>; + + gic: interrupt-controller@11900000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x11900000 0 0x40000>, + <0x0 0x11940000 0 0x60000>; + interrupts = ; + }; +}; + +&sysc { + interrupts = , + , + , + ; + interrupt-names = "lpm_int", "ca55stbydone_int", + "cm33stbyr_int", "ca55_deny"; +}; -- GitLab From 7f1d9f8bde624d34c6af527e043b7e57630a43d4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 2 Jan 2023 22:18:13 +0000 Subject: [PATCH 0836/1418] arm64: dts: renesas: r9a07g043u: Add IRQC node [ Upstream commit 48ab6eddd8bbcf7e9c8ae27bf42d0b52a777aaba ] Add IRQC node to R9A07G043 (RZ/G2UL) SoC DTSI. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230102221815.273719-5-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 14fe225dd5fc ("arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 68 +++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index b8bf06b512351..a6e777aee02ee 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -51,6 +51,74 @@ &soc { interrupt-parent = <&gic>; + irqc: interrupt-controller@110a0000 { + compatible = "renesas,r9a07g043u-irqc", + "renesas,rzg2l-irqc"; + reg = <0 0x110a0000 0 0x10000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err"; + clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>, + <&cpg CPG_MOD R9A07G043_IA55_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G043_IA55_RESETN>; + }; + gic: interrupt-controller@11900000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; -- GitLab From 76cfe86f2d20ce5e4b7d746ee1d140d10c3276e2 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 5 Feb 2024 14:44:20 +0000 Subject: [PATCH 0837/1418] arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes [ Upstream commit 14fe225dd5fcd5928583b0bcc34398a581f51602 ] The IRQC IP block supports Bus error and ECCRAM interrupts on RZ/G2L and alike SoC's (listed below). Update the IRQC nodes with the missing interrupts, and additionally, include the 'interrupt-names' properties in the IRQC nodes so that the driver can parse interrupts by name. - R9A07G043U - RZ/G2UL - R9A07G044L/R9A07G044LC - RZ/{G2L,G2LC} - R9A07G054 - RZ/V2L Fixes: 5edc51af5b30 ("arm64: dts: renesas: r9a07g044: Add IRQC node") Fixes: 48ab6eddd8bb ("arm64: dts: renesas: r9a07g043u: Add IRQC node") Fixes: 379478ab09e0 ("arm64: dts: renesas: r9a07g054: Add IRQC node") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240205144421.51195-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 12 +++++++++-- arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 22 ++++++++++++++++++++- arch/arm64/boot/dts/renesas/r9a07g054.dtsi | 22 ++++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index a6e777aee02ee..011d4c88f4ed9 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -99,7 +99,13 @@ , , , - ; + , + , + , + , + , + , + ; interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", "irq4", "irq5", "irq6", "irq7", @@ -111,7 +117,9 @@ "tint20", "tint21", "tint22", "tint23", "tint24", "tint25", "tint26", "tint27", "tint28", "tint29", "tint30", "tint31", - "bus-err"; + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>, <&cpg CPG_MOD R9A07G043_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index 7dbf6a6292f49..d26488b5a82df 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -698,7 +698,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, <&cpg CPG_MOD R9A07G044_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi index e000510b90a42..b3d37ca942ee3 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi @@ -704,7 +704,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G054_IA55_CLK>, <&cpg CPG_MOD R9A07G054_IA55_PCLK>; clock-names = "clk", "pclk"; -- GitLab From efab55e16c55c637f74ff58dcfddd18e0b3b56b6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Oct 2022 12:03:52 +0200 Subject: [PATCH 0838/1418] arm64: dts: renesas: r8a779a0: Update to R-Car Gen4 compatible values [ Upstream commit a1ca409cc050166a9e8ed183c1d4192f511cf6a2 ] Despite the name, R-Car V3U is the first member of the R-Car Gen4 family. Hence update the compatible properties in various device nodes to include family-specific compatible values for R-Car Gen4 instead of R-Car Gen3: - EtherAVB, - MSIOF. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/387168aef20d399d4f4318f4ecab9c3b016fd6f2.1666605756.git.geert+renesas@glider.be Stable-dep-of: 0c51912331f8 ("arm64: dts: renesas: r8a779a0: Correct avb[01] reg sizes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi index ed9400f903c9e..41fbb9998cf82 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi @@ -656,7 +656,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6800000 0 0x800>; interrupts = , , @@ -704,7 +704,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6810000 0 0x800>; interrupts = , , @@ -752,7 +752,7 @@ avb2: ethernet@e6820000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6820000 0 0x1000>; interrupts = , , @@ -800,7 +800,7 @@ avb3: ethernet@e6830000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6830000 0 0x1000>; interrupts = , , @@ -848,7 +848,7 @@ avb4: ethernet@e6840000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6840000 0 0x1000>; interrupts = , , @@ -896,7 +896,7 @@ avb5: ethernet@e6850000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6850000 0 0x1000>; interrupts = , , @@ -1019,7 +1019,7 @@ msiof0: spi@e6e90000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6e90000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 618>; @@ -1034,7 +1034,7 @@ msiof1: spi@e6ea0000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6ea0000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 619>; @@ -1049,7 +1049,7 @@ msiof2: spi@e6c00000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c00000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 620>; @@ -1064,7 +1064,7 @@ msiof3: spi@e6c10000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c10000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 621>; @@ -1079,7 +1079,7 @@ msiof4: spi@e6c20000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c20000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 622>; @@ -1094,7 +1094,7 @@ msiof5: spi@e6c28000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c28000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 623>; -- GitLab From bea023d846d352e1fbfdd95337cd8bc2b75323fe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 11 Feb 2024 15:21:30 +0100 Subject: [PATCH 0839/1418] arm64: dts: renesas: r8a779a0: Correct avb[01] reg sizes [ Upstream commit 0c51912331f8ba5ce5fb52f46e340945160672a3 ] All Ethernet AVB instances on R-Car V3U have registers related to UDP/IP support, but the declared register blocks for the first two instances are too small to cover them. Fix this by extending the register block sizes. Fixes: 5a633320f08b8c9b ("arm64: dts: renesas: r8a779a0: Add Ethernet-AVB support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/ce6ce3c4b1495e02e7c1803fca810a7178a84500.1707660323.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi index 41fbb9998cf82..b677ef6705d94 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi @@ -657,7 +657,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779a0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6800000 0 0x800>; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -705,7 +705,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779a0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6810000 0 0x800>; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , -- GitLab From 0820c84be6361af434ddba881d9b1a029165e5f8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 11 Feb 2024 15:21:31 +0100 Subject: [PATCH 0840/1418] arm64: dts: renesas: r8a779g0: Correct avb[01] reg sizes [ Upstream commit 7edbb5880dc3317a5eaec2166de71ff394598e6b ] All Ethernet AVB instances on R-Car V4H have registers related to UDP/IP support, but the declared register blocks for the first two instances are too small to cover them. Fix this by extending the register block sizes. Fixes: 848c82db56923a8b ("arm64: dts: renesas: r8a779g0: Add RAVB nodes") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/83437778614a7c96f4d8f1be98dffeee29bb4a0b.1707660323.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779g0.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi index d58b18802cb01..868d1a3cbdf61 100644 --- a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi @@ -337,7 +337,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6800000 0 0x800>; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -384,7 +384,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6810000 0 0x800>; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , -- GitLab From e16c33dd9967b7f20987bf653acc4f605836127b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:54 +0800 Subject: [PATCH 0841/1418] net: mctp: copy skb ext data when fragmenting [ Upstream commit 1394c1dec1c619a46867ed32791a29695372bff8 ] If we're fragmenting on local output, the original packet may contain ext data for the MCTP flows. We'll want this in the resulting fragment skbs too. So, do a skb_ext_copy() in the fragmentation path, and implement the MCTP-specific parts of an ext copy operation. Fixes: 67737c457281 ("mctp: Pass flow data & flow release events to drivers") Reported-by: Jian Zhang Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/skbuff.c | 8 ++++++++ net/mctp/route.c | 3 +++ 2 files changed, 11 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d4bd10f8723df..e38a4c7449f62 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6500,6 +6500,14 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } +#endif +#ifdef CONFIG_MCTP_FLOWS + if (old_active & (1 << SKB_EXT_MCTP)) { + struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP); + + if (flow->key) + refcount_inc(&flow->key->refs); + } #endif __skb_ext_put(old); return new; diff --git a/net/mctp/route.c b/net/mctp/route.c index 0144d8ebdaefb..05ab4fddc82e9 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -843,6 +843,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + /* we need to copy the extensions, for MCTP flow data */ + skb_ext_copy(skb2, skb); + /* do route */ rc = rt->output(rt, skb2); if (rc) -- GitLab From be52ee92ced6408addbdbb9aa930ee859acc1428 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 5 Dec 2023 10:26:15 -0800 Subject: [PATCH 0842/1418] pstore: inode: Convert mutex usage to guard(mutex) [ Upstream commit e2eeddefb046dbc771a6fa426f7f98fb25adfe68 ] Replace open-coded mutex handling with cleanup.h guard(mutex) and scoped_guard(mutex, ...). Cc: Guilherme G. Piccoli Cc: Tony Luck Cc: Link: https://lore.kernel.org/r/20231205182622.1329923-2-keescook@chromium.org Signed-off-by: Kees Cook Stable-dep-of: a43e0fc5e913 ("pstore: inode: Only d_invalidate() is needed") Signed-off-by: Sasha Levin --- fs/pstore/inode.c | 76 +++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ffbadb8b3032d..05ddfb37b15e1 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -182,25 +182,21 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; struct pstore_record *record = p->record; - int rc = 0; if (!record->psi->erase) return -EPERM; /* Make sure we can't race while removing this file. */ - mutex_lock(&records_list_lock); - if (!list_empty(&p->list)) - list_del_init(&p->list); - else - rc = -ENOENT; - p->dentry = NULL; - mutex_unlock(&records_list_lock); - if (rc) - return rc; - - mutex_lock(&record->psi->read_mutex); - record->psi->erase(record); - mutex_unlock(&record->psi->read_mutex); + scoped_guard(mutex, &records_list_lock) { + if (!list_empty(&p->list)) + list_del_init(&p->list); + else + return -ENOENT; + p->dentry = NULL; + } + + scoped_guard(mutex, &record->psi->read_mutex) + record->psi->erase(record); return simple_unlink(dir, dentry); } @@ -292,19 +288,16 @@ static struct dentry *psinfo_lock_root(void) { struct dentry *root; - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); /* * Having no backend is fine -- no records appear. * Not being mounted is fine -- nothing to do. */ - if (!psinfo || !pstore_sb) { - mutex_unlock(&pstore_sb_lock); + if (!psinfo || !pstore_sb) return NULL; - } root = pstore_sb->s_root; inode_lock(d_inode(root)); - mutex_unlock(&pstore_sb_lock); return root; } @@ -319,19 +312,19 @@ int pstore_put_backend_records(struct pstore_info *psi) if (!root) return 0; - mutex_lock(&records_list_lock); - list_for_each_entry_safe(pos, tmp, &records_list, list) { - if (pos->record->psi == psi) { - list_del_init(&pos->list); - rc = simple_unlink(d_inode(root), pos->dentry); - if (WARN_ON(rc)) - break; - d_drop(pos->dentry); - dput(pos->dentry); - pos->dentry = NULL; + scoped_guard(mutex, &records_list_lock) { + list_for_each_entry_safe(pos, tmp, &records_list, list) { + if (pos->record->psi == psi) { + list_del_init(&pos->list); + rc = simple_unlink(d_inode(root), pos->dentry); + if (WARN_ON(rc)) + break; + d_drop(pos->dentry); + dput(pos->dentry); + pos->dentry = NULL; + } } } - mutex_unlock(&records_list_lock); inode_unlock(d_inode(root)); @@ -355,20 +348,20 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (WARN_ON(!inode_is_locked(d_inode(root)))) return -EINVAL; - rc = -EEXIST; + guard(mutex)(&records_list_lock); + /* Skip records that are already present in the filesystem. */ - mutex_lock(&records_list_lock); list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && pos->record->psi == record->psi) - goto fail; + return -EEXIST; } rc = -ENOMEM; inode = pstore_get_inode(root->d_sb); if (!inode) - goto fail; + return -ENOMEM; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; scnprintf(name, sizeof(name), "%s-%s-%llu%s", @@ -395,7 +388,6 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); list_add(&private->list, &records_list); - mutex_unlock(&records_list_lock); return 0; @@ -403,8 +395,6 @@ fail_private: free_pstore_private(private); fail_inode: iput(inode); -fail: - mutex_unlock(&records_list_lock); return rc; } @@ -450,9 +440,8 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; - mutex_lock(&pstore_sb_lock); - pstore_sb = sb; - mutex_unlock(&pstore_sb_lock); + scoped_guard(mutex, &pstore_sb_lock) + pstore_sb = sb; pstore_get_records(0); @@ -467,17 +456,14 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); kill_litter_super(sb); pstore_sb = NULL; - mutex_lock(&records_list_lock); + guard(mutex)(&records_list_lock); INIT_LIST_HEAD(&records_list); - mutex_unlock(&records_list_lock); - - mutex_unlock(&pstore_sb_lock); } static struct file_system_type pstore_fs_type = { -- GitLab From db6e5e16f1ee9e3b01d2f71c7f0ba945f4bf0f4e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2024 09:48:46 -0800 Subject: [PATCH 0843/1418] pstore: inode: Only d_invalidate() is needed [ Upstream commit a43e0fc5e9134a46515de2f2f8d4100b74e50de3 ] Unloading a modular pstore backend with records in pstorefs would trigger the dput() double-drop warning: WARNING: CPU: 0 PID: 2569 at fs/dcache.c:762 dput.part.0+0x3f3/0x410 Using the combo of d_drop()/dput() (as mentioned in Documentation/filesystems/vfs.rst) isn't the right approach here, and leads to the reference counting problem seen above. Use d_invalidate() and update the code to not bother checking for error codes that can never happen. Suggested-by: Alexander Viro Fixes: 609e28bb139e ("pstore: Remove filesystem records when backend is unregistered") Signed-off-by: Kees Cook --- fs/pstore/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 05ddfb37b15e1..ea3f104371d62 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -306,7 +306,6 @@ int pstore_put_backend_records(struct pstore_info *psi) { struct pstore_private *pos, *tmp; struct dentry *root; - int rc = 0; root = psinfo_lock_root(); if (!root) @@ -316,11 +315,8 @@ int pstore_put_backend_records(struct pstore_info *psi) list_for_each_entry_safe(pos, tmp, &records_list, list) { if (pos->record->psi == psi) { list_del_init(&pos->list); - rc = simple_unlink(d_inode(root), pos->dentry); - if (WARN_ON(rc)) - break; - d_drop(pos->dentry); - dput(pos->dentry); + d_invalidate(pos->dentry); + simple_unlink(d_inode(root), pos->dentry); pos->dentry = NULL; } } @@ -328,7 +324,7 @@ int pstore_put_backend_records(struct pstore_info *psi) inode_unlock(d_inode(root)); - return rc; + return 0; } /* -- GitLab From 4478f7e5be243b49e6c7f4dd67cbdc9b8b8436f6 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:45 +0800 Subject: [PATCH 0844/1418] arm64: dts: allwinner: h6: Add RX DMA channel for SPDIF [ Upstream commit 7b59348c11f3355e284d77bbe3d33632ddadcfc2 ] The SPDIF hardware found on the H6 supports both transmit and receive functions. However it is missing the RX DMA channel. Add the SPDIF hardware block's RX DMA channel. Also remove the by-default pinmux, since the end device can choose to implement either or both functionalities. Fixes: f95b598df419 ("arm64: dts: allwinner: Add SPDIF node for Allwinner H6") Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20240127163247.384439-6-wens@kernel.org Signed-off-by: Jernej Skrabec Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 2 ++ arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi | 2 ++ arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi | 7 +++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 9ec49ac2f6fd5..381d58cea092d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -291,6 +291,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi index 4903d6358112d..855b7d43bc503 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi @@ -166,6 +166,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index ca1d287a0a01d..d11e5041bae9a 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -406,6 +406,7 @@ function = "spi1"; }; + /omit-if-no-ref/ spdif_tx_pin: spdif-tx-pin { pins = "PH7"; function = "spdif"; @@ -655,10 +656,8 @@ clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; clock-names = "apb", "spdif"; resets = <&ccu RST_BUS_SPDIF>; - dmas = <&dma 2>; - dma-names = "tx"; - pinctrl-names = "default"; - pinctrl-0 = <&spdif_tx_pin>; + dmas = <&dma 2>, <&dma 2>; + dma-names = "rx", "tx"; status = "disabled"; }; -- GitLab From 23bb0006c9dbf737153956c2069088c3bb2563df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Tue, 14 Mar 2023 19:06:04 +0100 Subject: [PATCH 0845/1418] ARM: dts: imx6dl-yapp4: Move phy reset into switch node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7da7b84fee58c85a6075022023d31edea40e81a1 ] Drop the phy-reset-duration and phy-reset-gpios deprecated properties and move reset-gpios under the switch node. Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo Stable-dep-of: 023bd910d3ab ("ARM: dts: imx6dl-yapp4: Fix typo in the QCA switch register address") Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index aacbf317feea6..cb1972f8e8d27 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -106,8 +106,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; - phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; - phy-reset-duration = <20>; phy-supply = <&sw2_reg>; status = "okay"; @@ -131,6 +129,7 @@ switch@10 { compatible = "qca,qca8334"; reg = <10>; + reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; switch_ports: ports { #address-cells = <1>; -- GitLab From 28b43ec7e4c00645efec8f7d239d649dfd6f71e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 14 Feb 2024 10:03:27 +0100 Subject: [PATCH 0846/1418] ARM: dts: imx6dl-yapp4: Fix typo in the QCA switch register address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 023bd910d3ab735459f84b22bb99fb9e00bd9d76 ] This change does not have any functional effect. The switch works just fine without this patch as it has full access to all the addresses on the bus. This is simply a clean-up to set the node name address and reg address to the same value. Fixes: 15b43e497ffd ("ARM: dts: imx6dl-yapp4: Use correct pseudo PHY address for the switch") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index cb1972f8e8d27..a655b945bf2df 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -128,7 +128,7 @@ switch@10 { compatible = "qca,qca8334"; - reg = <10>; + reg = <0x10>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; switch_ports: ports { -- GitLab From 681ba22555dc538a5073dc1491db8fd016a2eb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 14 Feb 2024 10:03:28 +0100 Subject: [PATCH 0847/1418] ARM: dts: imx6dl-yapp4: Move the internal switch PHYs under the switch node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79978bff2e4b8e05ebdf5fc3ee6b794002393484 ] We identified that the PHYs actually do not work since commit 7da7b84fee58 ("ARM: dts: imx6dl-yapp4: Move phy reset into switch node") as a coincidence of several circumstances. The reset signal is kept asserted by a pull-down resistor on the board unless it is deasserted by GPIO from the SoC. This is to keep the switch dead until it is configured properly by the kernel and user space. Prior to the referenced commit the switch was reset by the FEC driver and the reset GPIO was actively deasserted. The mdio-bus was scanned and the attached switch and its PHYs were found and configured. With the referenced commit the switch is reset by the qca8k driver. Because of another bug in the qca8k driver, functionality of the reset pin depends on its pre-kernel configuration. See commit c44fc98f0a8f ("net: dsa: qca8k: fix illegal usage of GPIO") The problem did not appear until we removed support for the switch and configuration of its reset pin from the bootloader. To fix that, properly describe the internal mdio-bus configuration of the qca8334 switch. The PHYs are internal to the switch and sit on its internal mdio-bus. Fixes: 7da7b84fee58 ("ARM: dts: imx6dl-yapp4: Move phy reset into switch node") Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 23 ++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index a655b945bf2df..4b7aee8958923 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -118,14 +118,6 @@ #address-cells = <1>; #size-cells = <0>; - phy_port2: phy@1 { - reg = <1>; - }; - - phy_port3: phy@2 { - reg = <2>; - }; - switch@10 { compatible = "qca,qca8334"; reg = <0x10>; @@ -150,15 +142,30 @@ eth2: port@2 { reg = <2>; label = "eth2"; + phy-mode = "internal"; phy-handle = <&phy_port2>; }; eth1: port@3 { reg = <3>; label = "eth1"; + phy-mode = "internal"; phy-handle = <&phy_port3>; }; }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy_port2: ethernet-phy@1 { + reg = <1>; + }; + + phy_port3: ethernet-phy@2 { + reg = <2>; + }; + }; }; }; }; -- GitLab From 24cc77b670ad74b67ff814d84dc87ce9fd2f5551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 23 Jan 2024 13:22:58 +0100 Subject: [PATCH 0848/1418] arm64: dts: marvell: reorder crypto interrupts on Armada SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ec55a22149d64f9ac41845d923b884d4a666bf4d ] Match order specified in binding documentation. It says "mem" should be the last interrupt. This fixes: arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:0: 'ring0' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:1: 'ring1' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:2: 'ring2' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:3: 'ring3' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:4: 'eip' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:5: 'mem' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# Signed-off-by: Rafał Miłecki Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 10 +++++----- arch/arm64/boot/dts/marvell/armada-cp11x.dtsi | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index df152c72276b8..cd28e1c45b70a 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -426,14 +426,14 @@ crypto: crypto@90000 { compatible = "inside-secure,safexcel-eip97ies"; reg = <0x90000 0x20000>; - interrupts = , - , + interrupts = , , , , - ; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + , + ; + interrupt-names = "ring0", "ring1", "ring2", + "ring3", "eip", "mem"; clocks = <&nb_periph_clk 15>; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi index d6c0990a267d9..218c059b16d9c 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi @@ -506,14 +506,14 @@ CP11X_LABEL(crypto): crypto@800000 { compatible = "inside-secure,safexcel-eip197b"; reg = <0x800000 0x200000>; - interrupts = <87 IRQ_TYPE_LEVEL_HIGH>, - <88 IRQ_TYPE_LEVEL_HIGH>, + interrupts = <88 IRQ_TYPE_LEVEL_HIGH>, <89 IRQ_TYPE_LEVEL_HIGH>, <90 IRQ_TYPE_LEVEL_HIGH>, <91 IRQ_TYPE_LEVEL_HIGH>, - <92 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + <92 IRQ_TYPE_LEVEL_HIGH>, + <87 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "ring0", "ring1", "ring2", "ring3", + "eip", "mem"; clock-names = "core", "reg"; clocks = <&CP11X_LABEL(clk) 1 26>, <&CP11X_LABEL(clk) 1 17>; -- GitLab From 804db3c1ad5c53ffac8f66fa8a94a13c77f18537 Mon Sep 17 00:00:00 2001 From: David McFarland Date: Wed, 3 Jan 2024 12:55:18 -0400 Subject: [PATCH 0849/1418] ACPI: resource: Add Infinity laptops to irq1_edge_low_force_override [ Upstream commit e2605d4039a42a03000856b3229932455717b48b ] A user reported a keyboard problem similar to ones reported with other Zen laptops, on an Infinity E15-5A165-BM. Add board name matches for this model and one (untested) close relative to irq1_edge_low_force_override. Link: https://lemmy.ml/post/9864736 Link: https://www.infinitygaming.com.au/bios/ Link: https://lore.kernel.org/linux-acpi/20231006123304.32686-1-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki Stable-dep-of: 021a67d09615 ("ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override") Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 5ebeb0d7b6be0..35a98a5916f63 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -543,6 +543,18 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "17U70P"), }, }, + { + /* Infinity E15-5A165-BM */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RG1E0009COM"), + }, + }, + { + /* Infinity E15-5A305-1M */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RGEE0016COM"), + }, + }, { } }; -- GitLab From c259c196ebd893cda95c435c2e9651924f9d48a5 Mon Sep 17 00:00:00 2001 From: "Alexey I. Froloff" Date: Fri, 16 Feb 2024 12:30:09 +0000 Subject: [PATCH 0850/1418] ACPI: resource: Do IRQ override on Lunnen Ground laptops [ Upstream commit e23ad54fef186aa66007895be1382c88f1ee2bf7 ] The Lunnen Ground 15 and 16 needs IRQ overriding for the keyboard to work. Adding an entries for these laptops to the override_table makes the internal keyboard functional. Signed-off-by: Alexey I. Froloff Signed-off-by: Rafael J. Wysocki Stable-dep-of: 021a67d09615 ("ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override") Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 35a98a5916f63..8420d97287f86 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -555,6 +555,20 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "GM5RGEE0016COM"), }, }, + { + /* Lunnen Ground 15 / AMD Ryzen 5 5500U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LLL5DAW"), + }, + }, + { + /* Lunnen Ground 16 / AMD Ryzen 7 5800U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LL6FA"), + }, + }, { } }; -- GitLab From ed86e1fa7bad5eca03fae1673c122dfd3115678b Mon Sep 17 00:00:00 2001 From: Maxim Kudinov Date: Fri, 23 Feb 2024 19:24:08 +0300 Subject: [PATCH 0851/1418] ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override [ Upstream commit 021a67d096154893cd1d883c7be0097e2ee327fd ] A known issue on some Zen laptops, keyboard stopped working due to commit 9946e39fe8d0 fael@kernel.org("ACPI: resource: skip IRQ override on AMD Zen platforms") on kernel 5.19.10. The ACPI IRQ override is required for this board due to buggy DSDT, thus adding the board vendor and name to irq1_edge_low_force_override fixes the issue. Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217394 Link: https://lore.kernel.org/linux-acpi/20231006123304.32686-1-hdegoede@redhat.com/ Tested-by: Maxim Trofimov Signed-off-by: Maxim Kudinov Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8420d97287f86..1c5c1a269fbee 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -569,6 +569,13 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "LL6FA"), }, }, + { + /* MAIBENBEN X577 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MAIBENBEN"), + DMI_MATCH(DMI_BOARD_NAME, "X577"), + }, + }, { } }; -- GitLab From 5bd963ff48e899c9f09d24c5ea58a77d65e4c96e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Feb 2024 17:35:27 +0100 Subject: [PATCH 0852/1418] ACPI: scan: Fix device check notification handling [ Upstream commit 793551c965116d9dfaf0550dacae1396a20efa69 ] It is generally invalid to fail a Device Check notification if the scan handler has not been attached to the given device after a bus rescan, because there may be valid reasons for the scan handler to refuse attaching to the device (for example, the device is not ready). For this reason, modify acpi_scan_device_check() to return 0 in that case without printing a warning. While at it, reduce the log level of the "already enumerated" message in the same function, because it is only interesting when debugging notification handling Fixes: 443fc8202272 ("ACPI / hotplug: Rework generic code to handle suprise removals") Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/acpi/scan.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 94154a849a3ea..293cdf486fd81 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -315,18 +315,14 @@ static int acpi_scan_device_check(struct acpi_device *adev) * again). */ if (adev->handler) { - dev_warn(&adev->dev, "Already enumerated\n"); - return -EALREADY; + dev_dbg(&adev->dev, "Already enumerated\n"); + return 0; } error = acpi_bus_scan(adev->handle); if (error) { dev_warn(&adev->dev, "Namespace scan failure\n"); return error; } - if (!adev->handler) { - dev_warn(&adev->dev, "Enumeration failure\n"); - error = -ENODEV; - } } else { error = acpi_scan_device_not_present(adev); } -- GitLab From 9e8486e46f8b0ef4a7d11c3402c2cea9b073ee13 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 27 Feb 2024 18:35:25 +0100 Subject: [PATCH 0853/1418] arm64: dts: rockchip: add missing interrupt-names for rk356x vdpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1c44d9afa6f89aa0e10a191f30868eb12cd719f ] The video-codec@fdea0400 was missing the interrupt-names property that is part of the binding. Add it. Fixes: 944be6fba401 ("arm64: dts: rockchip: Add VPU support for RK3568/RK3566") Cc: Piotr Oniszczuk Acked-by: Uwe Kleine-König Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240227173526.710056-1-heiko@sntech.de Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f4d6dbbbddcd4..99ad6fc51b584 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -596,6 +596,7 @@ compatible = "rockchip,rk3568-vpu"; reg = <0x0 0xfdea0000 0x0 0x800>; interrupts = ; + interrupt-names = "vdpu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "hclk"; iommus = <&vdpu_mmu>; -- GitLab From 47635b112a64b7b208224962471e7e42f110e723 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 27 Feb 2024 09:51:12 -0800 Subject: [PATCH 0854/1418] x86, relocs: Ignore relocations in .notes section [ Upstream commit aaa8736370db1a78f0e8434344a484f9fd20be3b ] When building with CONFIG_XEN_PV=y, .text symbols are emitted into the .notes section so that Xen can find the "startup_xen" entry point. This information is used prior to booting the kernel, so relocations are not useful. In fact, performing relocations against the .notes section means that the KASLR base is exposed since /sys/kernel/notes is world-readable. To avoid leaking the KASLR base without breaking unprivileged tools that are expecting to read /sys/kernel/notes, skip performing relocations in the .notes section. The values readable in .notes are then identical to those found in System.map. Reported-by: Guixiong Wei Closes: https://lore.kernel.org/all/20240218073501.54555-1-guixiongwei@gmail.com/ Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Fixes: da1a679cde9b ("Add /sys/kernel/notes") Reviewed-by: Juergen Gross Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- arch/x86/tools/relocs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 2925074b9a588..9a5b101c45023 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -653,6 +653,14 @@ static void print_absolute_relocs(void) if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } + /* + * Do not perform relocations in .notes section; any + * values there are meant for pre-boot consumption (e.g. + * startup_xen). + */ + if (sec_applies->shdr.sh_type == SHT_NOTE) { + continue; + } sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { -- GitLab From 934212a623cbab851848b6de377eb476718c3e4c Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Tue, 2 Jan 2024 13:38:13 +0800 Subject: [PATCH 0855/1418] SUNRPC: fix some memleaks in gssx_dec_option_array [ Upstream commit 3cfcfc102a5e57b021b786a755a38935e357797d ] The creds and oa->data need to be freed in the error-handling paths after their allocation. So this patch add these deallocations in the corresponding paths. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Signed-off-by: Zhipeng Lu Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index d79f12c2550ac..cb32ab9a83952 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { - kfree(oa->data); - return -ENOMEM; + err = -ENOMEM; + goto free_oa; } oa->data[0].option.data = CREDS_VALUE; @@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, /* option buffer */ p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } length = be32_to_cpup(p); p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } if (length == sizeof(CREDS_VALUE) && memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) { /* We have creds here. parse them */ err = gssx_dec_linux_creds(xdr, creds); if (err) - return err; + goto free_creds; oa->data[0].value.len = 1; /* presence */ } else { /* consume uninteresting buffer */ err = gssx_dec_buffer(xdr, &dummy); if (err) - return err; + goto free_creds; } } return 0; + +free_creds: + kfree(creds); +free_oa: + kfree(oa->data); + oa->data = NULL; + return err; } static int gssx_dec_status(struct xdr_stream *xdr, -- GitLab From 216712c69846f197cba22e8359825033c283abd2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 26 Feb 2024 22:37:39 +0100 Subject: [PATCH 0856/1418] mmc: wmt-sdmmc: remove an incorrect release_mem_region() call in the .remove function [ Upstream commit ae5004a40a262d329039b99b62bd3fe7645b66ad ] This looks strange to call release_mem_region() in a remove function without any request_mem_region() in the probe or "struct resource" somewhere. So remove the corresponding code. Fixes: 3a96dff0f828 ("mmc: SD/MMC Host Controller for Wondermedia WM8505/WM8650") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/bb0bb1ed1e18de55e8c0547625bde271e64b8c31.1708983064.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/wmt-sdmmc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c index 9aa3027ca25e4..f2abebb2d8574 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -886,7 +886,6 @@ static int wmt_mci_remove(struct platform_device *pdev) { struct mmc_host *mmc; struct wmt_mci_priv *priv; - struct resource *res; u32 reg_tmp; mmc = platform_get_drvdata(pdev); @@ -914,9 +913,6 @@ static int wmt_mci_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk_sdmmc); clk_put(priv->clk_sdmmc); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - mmc_free_host(mmc); dev_info(&pdev->dev, "WMT MCI device removed\n"); -- GitLab From 33fb18efaedef32d282c00596fa43ec906ccdd97 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 8 Feb 2024 11:46:28 +0800 Subject: [PATCH 0857/1418] ACPI: CPPC: enable AMD CPPC V2 support for family 17h processors [ Upstream commit a51ab63b297ce9e26e3ffb9be896018a42d5f32f ] As there are some AMD processors which only support CPPC V2 firmware and BIOS implementation, the amd_pstate driver will be failed to load when system booting with below kernel warning message: [ 0.477523] amd_pstate: the _CPC object is not present in SBIOS or ACPI disabled To make the amd_pstate driver can be loaded on those TR40 processors, it needs to match x86_model from 0x30 to 0x7F for family 17H. With the change, the system can load amd_pstate driver as expected. Reviewed-by: Mario Limonciello Reported-by: Gino Badouri Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218171 Fixes: fbd74d1689 ("ACPI: CPPC: Fix enabling CPPC on AMD systems with shared memory") Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- arch/x86/kernel/acpi/cppc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8d8752b44f113..ff8f25faca3dd 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -20,7 +20,7 @@ bool cpc_supported_by_cpu(void) (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) return true; else if (boot_cpu_data.x86 == 0x17 && - boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + boot_cpu_data.x86_model >= 0x30 && boot_cpu_data.x86_model <= 0x7f) return true; return boot_cpu_has(X86_FEATURE_CPPC); } -- GitLab From 2c727f83eb065b87cce306024b3b57d245b5129e Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 1 Mar 2024 00:35:09 +0200 Subject: [PATCH 0858/1418] wifi: rtw88: 8821c: Fix beacon loss and disconnect [ Upstream commit e1dfa21427baeb813f9a2f9ceab6b7d32c3ca425 ] Tenda U9 V2.0, which contains RTL8811CU, is practically unusable because of frequent disconnections: Feb 23 14:46:45 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:46:46 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 Feb 23 14:46:52 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-CONNECTED - Connection to 90:55:de:__:__:__ completed [id=0 id_str=] Feb 23 14:46:54 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:46:55 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 Feb 23 14:47:01 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-CONNECTED - Connection to 90:55:de:__:__:__ completed [id=0 id_str=] Feb 23 14:47:04 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:47:05 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 This is caused by a mistake in the chip initialisation. This version of the chip requires loading an extra AGC table right after the main one, but the extra table is being loaded at the wrong time, in rtw_chip_board_info_setup(). Move the extra AGC table loading to the right place, in rtw_phy_load_tables(). The rtw_chip_board_info_setup() can only do "software" things, and rtw_phy_load_tables() can really do IO. Fixes: 5d6651fe8583 ("rtw88: 8821c: support RFE type2 wifi NIC") Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/276c31d8-b9a8-4e54-a3ac-09b74657aff7@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/main.c | 2 -- drivers/net/wireless/realtek/rtw88/phy.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 4c8164db4a9e4..81f3112923f1c 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1989,8 +1989,6 @@ static int rtw_chip_board_info_setup(struct rtw_dev *rtwdev) rtw_phy_setup_phy_cond(rtwdev, 0); rtw_phy_init_tx_power(rtwdev); - if (rfe_def->agc_btg_tbl) - rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_table(rtwdev, rfe_def->phy_pg_tbl); rtw_load_table(rtwdev, rfe_def->txpwr_lmt_tbl); rtw_phy_tx_power_by_rate_config(hal); diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c index bd7d05e080848..fde7b532bc07e 100644 --- a/drivers/net/wireless/realtek/rtw88/phy.c +++ b/drivers/net/wireless/realtek/rtw88/phy.c @@ -1761,12 +1761,15 @@ static void rtw_load_rfk_table(struct rtw_dev *rtwdev) void rtw_phy_load_tables(struct rtw_dev *rtwdev) { + const struct rtw_rfe_def *rfe_def = rtw_get_rfe_def(rtwdev); const struct rtw_chip_info *chip = rtwdev->chip; u8 rf_path; rtw_load_table(rtwdev, chip->mac_tbl); rtw_load_table(rtwdev, chip->bb_tbl); rtw_load_table(rtwdev, chip->agc_tbl); + if (rfe_def->agc_btg_tbl) + rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_rfk_table(rtwdev); for (rf_path = 0; rf_path < rtwdev->hal.rf_path_num; rf_path++) { -- GitLab From eeaa98f34d84b2f07d2b23ddec118aa409d9b7eb Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 1 Mar 2024 00:35:58 +0200 Subject: [PATCH 0859/1418] wifi: rtw88: 8821c: Fix false alarm count [ Upstream commit c238adbc578eeb70cbc8fdd1bef3666b0f585b13 ] total_fa_cnt is supposed to include cck_fa_cnt and ofdm_fa_cnt, not just ofdm_fa_cnt. Fixes: 960361238b86 ("rtw88: 8821c: add false alarm statistics") Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/f3cb6d17-e4e4-44a7-9c9b-72aed994b5c9@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 609a2b86330d8..50e3e46f7d8aa 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -674,9 +674,9 @@ static void rtw8821c_false_alarm_statistics(struct rtw_dev *rtwdev) dm_info->cck_fa_cnt = cck_fa_cnt; dm_info->ofdm_fa_cnt = ofdm_fa_cnt; + dm_info->total_fa_cnt = ofdm_fa_cnt; if (cck_enable) dm_info->total_fa_cnt += cck_fa_cnt; - dm_info->total_fa_cnt = ofdm_fa_cnt; crc32_cnt = rtw_read32(rtwdev, REG_CRC_CCK); dm_info->cck_ok_cnt = FIELD_GET(GENMASK(15, 0), crc32_cnt); -- GitLab From 48fba9d7f5716f9ed7f6b5b914bc191e3ee4d805 Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:14 +0800 Subject: [PATCH 0860/1418] PCI: Make pci_dev_is_disconnected() helper public for other drivers [ Upstream commit 39714fd73c6b60a8d27bcc5b431afb0828bf4434 ] Make pci_dev_is_disconnected() public so that it can be called from Intel VT-d driver to quickly fix/workaround the surprise removal unplug hang issue for those ATS capable devices on PCIe switch downstream hotplug capable ports. Beside pci_device_is_present() function, this one has no config space space access, so is light enough to optimize the normal pure surprise removal and safe removal flow. Acked-by: Bjorn Helgaas Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-2-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Stable-dep-of: 4fc82cd907ac ("iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected") Signed-off-by: Sasha Levin --- drivers/pci/pci.h | 5 ----- include/linux/pci.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index e1d02b7c60294..9950deeb047a7 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -357,11 +357,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) return 0; } -static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) -{ - return dev->error_state == pci_channel_io_perm_failure; -} - /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 diff --git a/include/linux/pci.h b/include/linux/pci.h index eccaf1abea79d..f5d89a4b811f1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2355,6 +2355,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, -- GitLab From 34a7b30f56d30114bf4d436e4dc793afe326fbcf Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:15 +0800 Subject: [PATCH 0861/1418] iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected [ Upstream commit 4fc82cd907ac075648789cc3a00877778aa1838b ] For those endpoint devices connect to system via hotplug capable ports, users could request a hot reset to the device by flapping device's link through setting the slot's link control register, as pciehp_ist() DLLSC interrupt sequence response, pciehp will unload the device driver and then power it off. thus cause an IOMMU device-TLB invalidation (Intel VT-d spec, or ATS Invalidation in PCIe spec r6.1) request for non-existence target device to be sent and deadly loop to retry that request after ITE fault triggered in interrupt context. That would cause following continuous hard lockup warning and system hang [ 4211.433662] pcieport 0000:17:01.0: pciehp: Slot(108): Link Down [ 4211.433664] pcieport 0000:17:01.0: pciehp: Slot(108): Card not present [ 4223.822591] NMI watchdog: Watchdog detected hard LOCKUP on cpu 144 [ 4223.822622] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822623] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822623] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822624] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 1 0 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822624] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822625] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822625] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822625] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822626] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822626] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822626] FS: 0000000000000000(0000) GS:ffffa237ae400000(0000) knlGS:0000000000000000 [ 4223.822627] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4223.822627] CR2: 00007ffe86515d80 CR3: 000002fd3000a001 CR4: 0000000000770ee0 [ 4223.822627] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4223.822628] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 4223.822628] PKRU: 55555554 [ 4223.822628] Call Trace: [ 4223.822628] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822628] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822629] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822629] intel_iommu_release_device+0x1f/0x30 [ 4223.822629] iommu_release_device+0x33/0x60 [ 4223.822629] iommu_bus_notifier+0x7f/0x90 [ 4223.822630] blocking_notifier_call_chain+0x60/0x90 [ 4223.822630] device_del+0x2e5/0x420 [ 4223.822630] pci_remove_bus_device+0x70/0x110 [ 4223.822630] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822631] pciehp_disable_slot+0x6b/0x100 [ 4223.822631] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822631] pciehp_ist+0x176/0x180 [ 4223.822631] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822632] irq_thread_fn+0x19/0x50 [ 4223.822632] irq_thread+0x104/0x190 [ 4223.822632] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822632] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822633] kthread+0x114/0x130 [ 4223.822633] ? __kthread_cancel_work+0x40/0x40 [ 4223.822633] ret_from_fork+0x1f/0x30 [ 4223.822633] Kernel panic - not syncing: Hard LOCKUP [ 4223.822634] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822634] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822634] Call Trace: [ 4223.822634] [ 4223.822635] dump_stack+0x6d/0x88 [ 4223.822635] panic+0x101/0x2d0 [ 4223.822635] ? ret_from_fork+0x11/0x30 [ 4223.822635] nmi_panic.cold.14+0xc/0xc [ 4223.822636] watchdog_overflow_callback.cold.8+0x6d/0x81 [ 4223.822636] __perf_event_overflow+0x4f/0xf0 [ 4223.822636] handle_pmi_common+0x1ef/0x290 [ 4223.822636] ? __set_pte_vaddr+0x28/0x40 [ 4223.822637] ? flush_tlb_one_kernel+0xa/0x20 [ 4223.822637] ? __native_set_fixmap+0x24/0x30 [ 4223.822637] ? ghes_copy_tofrom_phys+0x70/0x100 [ 4223.822637] ? __ghes_peek_estatus.isra.16+0x49/0xa0 [ 4223.822637] intel_pmu_handle_irq+0xba/0x2b0 [ 4223.822638] perf_event_nmi_handler+0x24/0x40 [ 4223.822638] nmi_handle+0x4d/0xf0 [ 4223.822638] default_do_nmi+0x49/0x100 [ 4223.822638] exc_nmi+0x134/0x180 [ 4223.822639] end_repeat_nmi+0x16/0x67 [ 4223.822639] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822639] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 10 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822640] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822640] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822640] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822641] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822641] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822641] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822641] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] [ 4223.822642] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822642] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822643] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822643] intel_iommu_release_device+0x1f/0x30 [ 4223.822643] iommu_release_device+0x33/0x60 [ 4223.822643] iommu_bus_notifier+0x7f/0x90 [ 4223.822644] blocking_notifier_call_chain+0x60/0x90 [ 4223.822644] device_del+0x2e5/0x420 [ 4223.822644] pci_remove_bus_device+0x70/0x110 [ 4223.822644] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822644] pciehp_disable_slot+0x6b/0x100 [ 4223.822645] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822645] pciehp_ist+0x176/0x180 [ 4223.822645] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822645] irq_thread_fn+0x19/0x50 [ 4223.822646] irq_thread+0x104/0x190 [ 4223.822646] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822646] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822646] kthread+0x114/0x130 [ 4223.822647] ? __kthread_cancel_work+0x40/0x40 [ 4223.822647] ret_from_fork+0x1f/0x30 [ 4223.822647] Kernel Offset: 0x6400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Such issue could be triggered by all kinds of regular surprise removal hotplug operation. like: 1. pull EP(endpoint device) out directly. 2. turn off EP's power. 3. bring the link down. etc. this patch aims to work for regular safe removal and surprise removal unplug. these hot unplug handling process could be optimized for fix the ATS Invalidation hang issue by calling pci_dev_is_disconnected() in function devtlb_invalidation_with_pasid() to check target device state to avoid sending meaningless ATS Invalidation request to iommu when device is gone. (see IMPLEMENTATION NOTE in PCIe spec r6.1 section 10.3.1) For safe removal, device wouldn't be removed until the whole software handling process is done, it wouldn't trigger the hard lock up issue caused by too long ATS Invalidation timeout wait. In safe removal path, device state isn't set to pci_channel_io_perm_failure in pciehp_unconfigure_device() by checking 'presence' parameter, calling pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return false there, wouldn't break the function. For surprise removal, device state is set to pci_channel_io_perm_failure in pciehp_unconfigure_device(), means device is already gone (disconnected) call pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return true to break the function not to send ATS Invalidation request to the disconnected device blindly, thus avoid to trigger further ITE fault, and ITE fault will block all invalidation request to be handled. furthermore retry the timeout request could trigger hard lockup. safe removal (present) & surprise removal (not present) pciehp_ist() pciehp_handle_presence_or_link_change() pciehp_disable_slot() remove_board() pciehp_unconfigure_device(presence) { if (!presence) pci_walk_bus(parent, pci_dev_set_disconnected, NULL); } this patch works for regular safe removal and surprise removal of ATS capable endpoint on PCIe switch downstream ports. Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-3-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/pasid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3f03039e5cce5..32432d82d7744 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -435,6 +435,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; + if (pci_dev_is_disconnected(to_pci_dev(dev))) + return; + sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; pfsid = info->pfsid; -- GitLab From 8499af0616cf76e6cbe811107e3f5b33bd472041 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 20 Feb 2024 15:57:11 -0800 Subject: [PATCH 0862/1418] igb: Fix missing time sync events [ Upstream commit ee14cc9ea19ba9678177e2224a9c58cce5937c73 ] Fix "double" clearing of interrupts, which can cause external events or timestamps to be missed. The E1000_TSIRC Time Sync Interrupt Cause register can be cleared in two ways, by either reading it or by writing '1' into the specific cause bit. This is documented in section 8.16.1. The following flow was used: 1. read E1000_TSIRC into 'tsicr'; 2. handle the interrupts present into 'tsirc' and mark them in 'ack'; 3. write 'ack' into E1000_TSICR; As both (1) and (3) will clear the interrupt cause, if the same interrupt happens again between (1) and (3) it will be ignored, causing events to be missed. Remove the extra clear in (3). Fixes: 00c65578b47b ("igb: enable internal PPS for the i210") Acked-by: Richard Cochran Signed-off-by: Vinicius Costa Gomes Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 45ce4ed16146e..81d9a5338be5e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6926,44 +6926,31 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 ack = 0, tsicr = rd32(E1000_TSICR); + u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= TSINTR_SYS_WRAP; } if (tsicr & E1000_TSICR_TXTS) { /* retrieve hardware timestamp */ schedule_work(&adapter->ptp_tx_work); - ack |= E1000_TSICR_TXTS; } - if (tsicr & TSINTR_TT0) { + if (tsicr & TSINTR_TT0) igb_perout(adapter, 0); - ack |= TSINTR_TT0; - } - if (tsicr & TSINTR_TT1) { + if (tsicr & TSINTR_TT1) igb_perout(adapter, 1); - ack |= TSINTR_TT1; - } - if (tsicr & TSINTR_AUTT0) { + if (tsicr & TSINTR_AUTT0) igb_extts(adapter, 0); - ack |= TSINTR_AUTT0; - } - if (tsicr & TSINTR_AUTT1) { + if (tsicr & TSINTR_AUTT1) igb_extts(adapter, 1); - ack |= TSINTR_AUTT1; - } - - /* acknowledge the interrupts */ - wr32(E1000_TSICR, ack); } static irqreturn_t igb_msix_other(int irq, void *data) -- GitLab From e5f04ec4421752afb7707d2c0dfdcc470f548eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Sun, 7 Jan 2024 19:02:47 +0100 Subject: [PATCH 0863/1418] Bluetooth: Remove HCI_POWER_OFF_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 968667f2e0345a67a6eea5a502f4659085666564 ] With commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), the power off sequence got refactored so that this timeout was no longer necessary, let's remove the leftover define from the header too. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a674221d151db..c69e09909449f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -416,7 +416,6 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ -#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ -- GitLab From 8beed376c9195427c750a0072ca4c323b0e9b016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Sun, 7 Jan 2024 19:02:48 +0100 Subject: [PATCH 0864/1418] Bluetooth: mgmt: Remove leftover queuing of power_off work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fee054b7579fe252f8b9e6c17b9c5bfdaa84dd7e ] Queuing of power_off work was introduced in these functions with commits 8b064a3ad377 ("Bluetooth: Clean up HCI state when doing power off") and c9910d0fb4fc ("Bluetooth: Fix disconnecting connections in non-connected states") in an effort to clean up state and do things like disconnecting devices before actually powering off the device. After that, commit a3172b7eb4a2 ("Bluetooth: Add timer to force power off") introduced a timeout to ensure that the device actually got powered off, even if some of the cleanup work would never complete. This code later got refactored with commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), which made powering off the device synchronous and removed the need for initiating the power_off work from other places. The timeout mentioned above got removed too, because we now also made use of the command timeout during power on/off. These days the power_off work still exists, but it only seems to only be used for HCI_AUTO_OFF functionality, which is why we never noticed those two leftover places where we queue power_off work. So let's remove that code. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ab63f807e3c80..a80bf9c42c2ef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9766,14 +9766,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - if (!mgmt_connected) return; @@ -9830,14 +9822,6 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct mgmt_ev_connect_failed ev; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.status = mgmt_status(status); -- GitLab From 653a17a99d752ffde175d4bc96154f2a3642f400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Mon, 8 Jan 2024 23:46:06 +0100 Subject: [PATCH 0865/1418] Bluetooth: Remove superfluous call to hci_conn_check_pending() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 78e3639fc8031275010c3287ac548c0bc8de83b1 ] The "pending connections" feature was originally introduced with commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") and 6bd57416127e ("[Bluetooth] Handling pending connect attempts after inquiry") to handle controllers supporting only a single connection request at a time. Later things were extended to also cancel ongoing inquiries on connect() with commit 89e65975fea5 ("Bluetooth: Cancel Inquiry before Create Connection"). With commit a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes"), hci_conn_check_pending() was introduced as a helper to consolidate a few places where we check for pending connections (indicated by the BT_CONNECT2 flag) and then try to connect. This refactoring commit also snuck in two more calls to hci_conn_check_pending(): - One is in the failure callback of hci_cs_inquiry(), this one probably makes sense: If we send an "HCI Inquiry" command and then immediately after a "Create Connection" command, the "Create Connection" command might fail before the "HCI Inquiry" command, and then we want to retry the "Create Connection" on failure of the "HCI Inquiry". - The other added call to hci_conn_check_pending() is in the event handler for the "Remote Name" event, this seems unrelated and is possibly a copy-paste error, so remove that one. Fixes: a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 452d839c152fc..0cd093ec6486c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3567,8 +3567,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); -- GitLab From a96738eb215f4f685b16f7f19532fd0342dfdb51 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 8 Feb 2024 17:40:17 +0100 Subject: [PATCH 0866/1418] Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional() [ Upstream commit 56d074d26c5828773b00b2185dd7e1d08273b8e8 ] The optional variants for the gpiod_get() family of functions return NULL if the GPIO in question is not associated with this device. They return ERR_PTR() on any other error. NULL descriptors are graciously handled by GPIOLIB and can be safely passed to any of the GPIO consumer interfaces as they will return 0 and act as if the function succeeded. If one is using the optional variant, then there's no point in checking for NULL. Fixes: 6845667146a2 ("Bluetooth: hci_qca: Fix NULL vs IS_ERR_OR_NULL check in qca_serdev_probe") Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8bfef7f81b417..2acda547f4f3e 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2254,7 +2254,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && + if (IS_ERR(qcadev->bt_en) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); @@ -2263,7 +2263,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && + if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || data->soc_type == QCA_WCN7850)) @@ -2285,7 +2285,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en)) { + if (IS_ERR(qcadev->bt_en)) { dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); power_ctrl_enabled = false; } -- GitLab From 1023de27cd1d0d692e70fe6d6d5cee9fff9b9c84 Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 20 Apr 2023 20:23:36 +0800 Subject: [PATCH 0867/1418] Bluetooth: Cancel sync command before suspend and power off [ Upstream commit f419863588217f76eaf754e1dfce21ea7fcb026d ] Some of the sync commands might take a long time to complete, e.g. LE Create Connection when the peer device isn't responding might take 20 seconds before it times out. If suspend command is issued during this time, it will need to wait for completion since both commands are using the same sync lock. This patch cancel any running sync commands before attempting to suspend or adapter power off. Signed-off-by: Archie Pusaka Reviewed-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 3 +++ net/bluetooth/mgmt.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a8932d449eb63..a7e6ce2e61c5e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2838,6 +2838,9 @@ int hci_suspend_dev(struct hci_dev *hdev) if (mgmt_powering_down(hdev)) return 0; + /* Cancel potentially blocking sync operation before suspend */ + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); hci_req_sync_unlock(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a80bf9c42c2ef..a657dc1d4ec7a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1401,6 +1401,10 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } + /* Cancel potentially blocking sync operation before power off */ + if (cp->val == 0x00) + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); -- GitLab From ac7a47aaa7944efc94e4fc23cc438b7bd9cc222c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 21 Apr 2023 11:37:55 -0700 Subject: [PATCH 0868/1418] Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running [ Upstream commit d883a4669a1def6d121ccf5e64ad28260d1c9531 ] This makes sure hci_cmd_sync_queue only queue new work if HCI_RUNNING has been set otherwise there is a risk of commands being sent while turning off. Because hci_cmd_sync_queue can no longer queue work while HCI_RUNNING is not set it cannot be used to power on adapters so instead hci_cmd_sync_submit is introduced which bypass the HCI_RUNNING check, so it behaves like the old implementation. Link: https://lore.kernel.org/all/CAB4PzUpDMvdc8j2MdeSAy1KkAE-D3woprCwAdYWeOc-3v3c9Sw@mail.gmail.com/ Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 2 ++ net/bluetooth/hci_sync.c | 25 +++++++++++++++++++++++-- net/bluetooth/mgmt.c | 12 ++++++++---- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 17f5a4c32f36e..2fa976c466b80 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -41,6 +41,8 @@ void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a337340464567..31dd064d77a42 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -680,8 +680,12 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) } EXPORT_SYMBOL(hci_cmd_sync_cancel); -int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy) +/* Submit HCI command to be run in as cmd_sync_work: + * + * - hdev must _not_ be unregistered + */ +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; int err = 0; @@ -711,6 +715,23 @@ unlock: mutex_unlock(&hdev->unregister_lock); return err; } +EXPORT_SYMBOL(hci_cmd_sync_submit); + +/* Queue HCI command: + * + * - hdev must be running + */ +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + /* Only queue command if hdev is running which means it had been opened + * and is either on init phase or is already up. + */ + if (!test_bit(HCI_RUNNING, &hdev->flags)) + return -ENETDOWN; + + return hci_cmd_sync_submit(hdev, func, data, destroy); +} EXPORT_SYMBOL(hci_cmd_sync_queue); int hci_update_eir_sync(struct hci_dev *hdev) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a657dc1d4ec7a..732b6cf45fbe4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1402,11 +1402,15 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, } /* Cancel potentially blocking sync operation before power off */ - if (cp->val == 0x00) + if (cp->val == 0x00) { __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); - - err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, - mgmt_set_powered_complete); + err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } else { + /* Use hci_cmd_sync_submit since hdev might not be running */ + err = hci_cmd_sync_submit(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } if (err < 0) mgmt_pending_remove(cmd); -- GitLab From 6083089ab00631617f9eac678df3ab050a9d837a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Jun 2023 17:25:06 -0700 Subject: [PATCH 0869/1418] Bluetooth: hci_conn: Consolidate code for aborting connections [ Upstream commit a13f316e90fdb1fb6df6582e845aa9b3270f3581 ] This consolidates code for aborting connections using hci_cmd_sync_queue so it is synchronized with other threads, but because of the fact that some commands may block the cmd_sync_queue while waiting specific events this attempt to cancel those requests by using hci_cmd_sync_cancel. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 154 ++++++------------------------- net/bluetooth/hci_sync.c | 23 +++-- net/bluetooth/mgmt.c | 15 +-- 4 files changed, 47 insertions(+), 147 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 09c978f3d95dc..2538f3b96623b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -739,6 +739,7 @@ struct hci_conn { unsigned long flags; enum conn_reasons conn_reason; + __u8 abort_reason; __u32 clock; __u16 clock_accuracy; @@ -758,7 +759,6 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; - struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 12d36875358b9..f752a9f9bb9c7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -175,57 +175,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } -static void le_scan_cleanup(struct work_struct *work) -{ - struct hci_conn *conn = container_of(work, struct hci_conn, - le_scan_cleanup); - struct hci_dev *hdev = conn->hdev; - struct hci_conn *c = NULL; - - BT_DBG("%s hcon %p", hdev->name, conn); - - hci_dev_lock(hdev); - - /* Check that the hci_conn is still around */ - rcu_read_lock(); - list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { - if (c == conn) - break; - } - rcu_read_unlock(); - - if (c == conn) { - hci_connect_le_scan_cleanup(conn, 0x00); - hci_conn_cleanup(conn); - } - - hci_dev_unlock(hdev); - hci_dev_put(hdev); - hci_conn_put(conn); -} - -static void hci_connect_le_scan_remove(struct hci_conn *conn) -{ - BT_DBG("%s hcon %p", conn->hdev->name, conn); - - /* We can't call hci_conn_del/hci_conn_cleanup here since that - * could deadlock with another hci_conn_del() call that's holding - * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). - * Instead, grab temporary extra references to the hci_dev and - * hci_conn and perform the necessary cleanup in a separate work - * callback. - */ - - hci_dev_hold(conn->hdev); - hci_conn_get(conn); - - /* Even though we hold a reference to the hdev, many other - * things might get cleaned up meanwhile, including the hdev's - * own workqueue, so we can't use that for scheduling. - */ - schedule_work(&conn->le_scan_cleanup); -} - static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -672,13 +621,6 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; - /* LE connections in scanning state need special handling */ - if (conn->state == BT_CONNECT && conn->type == LE_LINK && - test_bit(HCI_CONN_SCANNING, &conn->flags)) { - hci_connect_le_scan_remove(conn); - return; - } - hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } @@ -1050,7 +992,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); - INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -2837,81 +2778,46 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } -int hci_abort_conn(struct hci_conn *conn, u8 reason) +static int abort_conn_sync(struct hci_dev *hdev, void *data) { - int r = 0; + struct hci_conn *conn; + u16 handle = PTR_ERR(data); - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) return 0; - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - if (conn->type == AMP_LINK) { - struct hci_cp_disconn_phy_link cp; + return hci_abort_conn_sync(hdev, conn, conn->abort_reason); +} - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp); - } else { - struct hci_cp_disconnect dc; +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + struct hci_dev *hdev = conn->hdev; - dc.handle = cpu_to_le16(conn->handle); - dc.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, - sizeof(dc), &dc); - } + /* If abort_reason has already been set it means the connection is + * already being aborted so don't attempt to overwrite it. + */ + if (conn->abort_reason) + return 0; - conn->state = BT_DISCONN; + bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); - break; - case BT_CONNECT: - if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); - } else if (conn->type == ACL_LINK) { - if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - } - break; - case BT_CONNECT2: - if (conn->type == ACL_LINK) { - struct hci_cp_reject_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = reason; - - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - struct hci_cp_reject_sync_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - - /* SCO rejection has its own limited set of - * allowed error values (0x0D-0x0F) which isn't - * compatible with most values passed to this - * function. To be safe hard-code one of the - * values that's suitable for SCO. - */ - rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; + conn->abort_reason = reason; - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); + /* If the connection is pending check the command opcode since that + * might be blocking on hci_cmd_sync_work while waiting its respective + * event so we need to hci_cmd_sync_cancel to cancel it. + */ + if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + switch (hci_skb_event(hdev->sent_cmd)) { + case HCI_EV_LE_CONN_COMPLETE: + case HCI_EV_LE_ENHANCED_CONN_COMPLETE: + case HCI_EVT_LE_CIS_ESTABLISHED: + hci_cmd_sync_cancel(hdev, -ECANCELED); + break; } - break; - default: - conn->state = BT_CLOSED; - break; } - return r; + return hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 31dd064d77a42..c03729c10fdd6 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5230,22 +5230,27 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, - struct hci_conn *conn) + struct hci_conn *conn, u8 reason) { + /* Return reason if scanning since the connection shall probably be + * cleanup directly. + */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - return 0; + return reason; - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + if (conn->role == HCI_ROLE_SLAVE || + test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } -static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) +static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, + u8 reason) { if (conn->type == LE_LINK) - return hci_le_connect_cancel_sync(hdev, conn); + return hci_le_connect_cancel_sync(hdev, conn, reason); if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; @@ -5298,9 +5303,11 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - err = hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn, reason); /* Cleanup hci_conn object if it cannot be cancelled as it - * likelly means the controller and host stack are out of sync. + * likelly means the controller and host stack are out of sync + * or in case of LE it was still scanning so it can be cleanup + * safely. */ if (err) { hci_dev_lock(hdev); @@ -6215,7 +6222,7 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) done: if (err == -ETIMEDOUT) - hci_le_connect_cancel_sync(hdev, conn); + hci_le_connect_cancel_sync(hdev, conn, 0x00); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 732b6cf45fbe4..fbd859e2d13ca 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3583,18 +3583,6 @@ unlock: return err; } -static int abort_conn_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_ERR(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); -} - static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3645,8 +3633,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), - NULL); + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); unlock: hci_dev_unlock(hdev); -- GitLab From 1b6cfa4c760e5f3729e21b725c89f6ddb9be5abf Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 9 Jan 2024 13:45:40 -0500 Subject: [PATCH 0870/1418] Bluetooth: hci_core: Cancel request on command timeout [ Upstream commit 63298d6e752fc0ec7f5093860af8bc9f047b30c8 ] If command has timed out call __hci_cmd_sync_cancel to notify the hci_req since it will inevitably cause a timeout. This also rework the code around __hci_cmd_sync_cancel since it was wrongly assuming it needs to cancel timer as well, but sometimes the timers have not been started or in fact they already had timed out in which case they don't need to be cancel yet again. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_core.c | 86 +++++++++++++++++++++----------- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_sync.c | 20 ++++---- net/bluetooth/mgmt.c | 2 +- 5 files changed, 72 insertions(+), 40 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 2fa976c466b80..59d15b1a978ab 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -39,7 +39,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a7e6ce2e61c5e..edf7af2e13557 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1492,10 +1492,11 @@ static void hci_cmd_timeout(struct work_struct *work) cmd_timer.work); if (hdev->sent_cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); + u16 opcode = hci_skb_opcode(hdev->sent_cmd); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); + + hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } @@ -2822,6 +2823,23 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) return ret; } +/* Cancel ongoing command synchronously: + * + * - Cancel command timer + * - Reset command counter + * - Cancel command request + */ +static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + cancel_delayed_work_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->ncmd_timer); + atomic_set(&hdev->cmd_cnt, 1); + + hci_cmd_sync_cancel_sync(hdev, -err); +} + /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -2839,7 +2857,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, -EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4119,6 +4137,33 @@ static void hci_rx_work(struct work_struct *work) } } +static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) +{ + int err; + + bt_dev_dbg(hdev, "skb %p", skb); + + kfree_skb(hdev->sent_cmd); + + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); + if (!hdev->sent_cmd) { + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + return; + } + + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, err); + return; + } + + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); + + atomic_dec(&hdev->cmd_cnt); +} + static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); @@ -4133,30 +4178,15 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - kfree_skb(hdev->sent_cmd); - - hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); - if (hdev->sent_cmd) { - int res; - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); - atomic_dec(&hdev->cmd_cnt); - - res = hci_send_frame(hdev, skb); - if (res < 0) - __hci_cmd_sync_cancel(hdev, -res); - - rcu_read_lock(); - if (test_bit(HCI_RESET, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - cancel_delayed_work(&hdev->cmd_timer); - else - queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, - HCI_CMD_TIMEOUT); - rcu_read_unlock(); - } else { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } + hci_send_cmd_sync(hdev, skb); + + rcu_read_lock(); + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + cancel_delayed_work(&hdev->cmd_timer); + else + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f7e006a363829..4468647df6722 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -916,7 +916,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - __hci_cmd_sync_cancel(hdev, ENODEV); + hci_cmd_sync_cancel_sync(hdev, ENODEV); cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c03729c10fdd6..d0029f10d9023 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -651,7 +651,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) mutex_unlock(&hdev->cmd_sync_work_lock); } -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -659,15 +659,17 @@ void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - cancel_delayed_work_sync(&hdev->cmd_timer); - cancel_delayed_work_sync(&hdev->ncmd_timer); - atomic_set(&hdev->cmd_cnt, 1); - - wake_up_interruptible(&hdev->req_wait_q); + queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } } +EXPORT_SYMBOL(hci_cmd_sync_cancel); -void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +/* Cancel ongoing command request synchronously: + * + * - Set result and mark status to HCI_REQ_CANCELED + * - Wakeup command sync thread + */ +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -675,10 +677,10 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); + wake_up_interruptible(&hdev->req_wait_q); } } -EXPORT_SYMBOL(hci_cmd_sync_cancel); +EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); /* Submit HCI command to be run in as cmd_sync_work: * diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fbd859e2d13ca..4a35535f56607 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1403,7 +1403,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, /* Cancel potentially blocking sync operation before power off */ if (cp->val == 0x00) { - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } else { -- GitLab From da77c1d39bc527b31890bfa0405763c82828defb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 16:20:11 -0500 Subject: [PATCH 0871/1418] Bluetooth: hci_sync: Fix overwriting request callback [ Upstream commit 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a ] In a few cases the stack may generate commands as responses to events which would happen to overwrite the sent_cmd, so this attempts to store the request in req_skb so even if sent_cmd is replaced with a new command the pending request will remain in stored in req_skb. Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 46 ++++++++++++++++++++++---------- net/bluetooth/hci_event.c | 18 ++++++------- net/bluetooth/hci_sync.c | 21 ++++++++++++--- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2538f3b96623b..6bc6de5345261 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -549,6 +549,7 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; + struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f752a9f9bb9c7..bac5a369d2bef 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2813,7 +2813,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, -ECANCELED); + hci_cmd_sync_cancel(hdev, ECANCELED); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index edf7af2e13557..e0c924df13b58 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1491,8 +1491,8 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->sent_cmd) { - u16 opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb) { + u16 opcode = hci_skb_opcode(hdev->req_skb); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); @@ -2792,6 +2792,7 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -3121,21 +3122,33 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) { struct hci_command_hdr *hdr; - if (!hdev->sent_cmd) + if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) return NULL; - hdr = (void *) hdev->sent_cmd->data; + hdr = (void *)skb->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + return skb->data + HCI_COMMAND_HDR_SIZE; +} - return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; +/* Get data from the previously sent command */ +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +{ + void *data; + + /* Check if opcode matches last sent command */ + data = hci_cmd_data(hdev->sent_cmd, opcode); + if (!data) + /* Check if opcode matches last request */ + data = hci_cmd_data(hdev->req_skb, opcode); + + return data; } /* Get data from last received event */ @@ -4031,17 +4044,19 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; + skb = hdev->req_skb; + /* If this was the last command in a request the complete - * callback would be found in hdev->sent_cmd instead of the + * callback would be found in hdev->req_skb instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (skb && bt_cb(skb)->hci.req_complete) { + *req_complete = bt_cb(skb)->hci.req_complete; return; } @@ -4158,8 +4173,11 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); + if (hci_req_status_pend(hdev) && + !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { + kfree_skb(hdev->req_skb); + hdev->req_skb = skb_clone(skb, GFP_KERNEL); + } atomic_dec(&hdev->cmd_cnt); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0cd093ec6486c..6b746ab9f6d21 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4329,7 +4329,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { + if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -7147,10 +7147,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && - hci_skb_event(hdev->sent_cmd) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + hci_skb_event(hdev->req_skb) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7537,10 +7537,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && - hci_skb_event(hdev->sent_cmd) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && + hci_skb_event(hdev->req_skb) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d0029f10d9023..65b2ad34179f8 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -31,6 +31,10 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; + /* Free the request command so it is not used as response */ + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -38,7 +42,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_skb = skb_get(skb); + hdev->req_rsp = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -186,8 +190,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; + skb = hdev->req_rsp; + hdev->req_rsp = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -4879,6 +4883,11 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5040,6 +5049,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + /* Drop last request */ + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); -- GitLab From 68644bf5ec6baaff40fc39b3529c874bfda709bd Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 10:49:26 -0500 Subject: [PATCH 0872/1418] Bluetooth: hci_core: Fix possible buffer overflow [ Upstream commit 81137162bfaa7278785b24c1fd2e9e74f082e8e4 ] struct hci_dev_info has a fixed size name[8] field so in the event that hdev->name is bigger than that strcpy would attempt to write past its size, so this fixes this problem by switching to use strscpy. Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e0c924df13b58..88e9d7e0865a2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -908,7 +908,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strcpy(di.name, hdev->name); + strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; -- GitLab From cb8adca52f306563d958a863bb0cbae9c184d1ae Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 1 Mar 2024 12:58:11 -0500 Subject: [PATCH 0873/1418] Bluetooth: af_bluetooth: Fix deadlock [ Upstream commit f7b94bdc1ec107c92262716b073b3e816d4784fb ] Attemting to do sock_lock on .recvmsg may cause a deadlock as shown bellow, so instead of using sock_sock this uses sk_receive_queue.lock on bt_sock_ioctl to avoid the UAF: INFO: task kworker/u9:1:121 blocked for more than 30 seconds. Not tainted 6.7.6-lemon #183 Workqueue: hci0 hci_rx_work Call Trace: __schedule+0x37d/0xa00 schedule+0x32/0xe0 __lock_sock+0x68/0xa0 ? __pfx_autoremove_wake_function+0x10/0x10 lock_sock_nested+0x43/0x50 l2cap_sock_recv_cb+0x21/0xa0 l2cap_recv_frame+0x55b/0x30a0 ? psi_task_switch+0xeb/0x270 ? finish_task_switch.isra.0+0x93/0x2a0 hci_rx_work+0x33a/0x3f0 process_one_work+0x13a/0x2f0 worker_thread+0x2f0/0x410 ? __pfx_worker_thread+0x10/0x10 kthread+0xe0/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 2e07e8348ea4 ("Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/af_bluetooth.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f1b7510359e4b..3f9ff02baafe3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -264,14 +264,11 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - lock_sock(sk); - skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - release_sock(sk); return err; } @@ -297,8 +294,6 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - release_sock(sk); - if (flags & MSG_TRUNC) copied = skblen; @@ -521,10 +516,11 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - lock_sock(sk); + spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; - release_sock(sk); + spin_unlock(&sk->sk_receive_queue.lock); + err = put_user(amount, (int __user *)arg); break; -- GitLab From 715264ad09fd4004e347cdb79fa58a4f2344f13f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 2 Mar 2024 19:06:23 +0200 Subject: [PATCH 0874/1418] Bluetooth: fix use-after-free in accessing skb after sending it [ Upstream commit 947ec0d002dce8577b655793dcc6fc78d67b7cb6 ] hci_send_cmd_sync first sends skb and then tries to clone it. However, the driver may have already freed the skb at that point. Fix by cloning the sent_cmd cloned just above, instead of the original. Log: ================================================================ BUG: KASAN: slab-use-after-free in __copy_skb_header+0x1a/0x240 ... Call Trace: .. __skb_clone+0x59/0x2c0 hci_cmd_work+0x3b3/0x3d0 [bluetooth] process_one_work+0x459/0x900 ... Allocated by task 129: ... __alloc_skb+0x1ae/0x220 __hci_cmd_sync_sk+0x44c/0x7a0 [bluetooth] __hci_cmd_sync_status+0x24/0xb0 [bluetooth] set_cig_params_sync+0x778/0x7d0 [bluetooth] ... Freed by task 0: ... kmem_cache_free+0x157/0x3c0 __usb_hcd_giveback_urb+0x11e/0x1e0 usb_giveback_urb_bh+0x1ad/0x2a0 tasklet_action_common.isra.0+0x259/0x4a0 __do_softirq+0x15b/0x5a7 ================================================================ Fixes: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 88e9d7e0865a2..70f24dc75b596 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4176,7 +4176,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (hci_req_status_pend(hdev) && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(skb, GFP_KERNEL); + hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } atomic_dec(&hdev->cmd_cnt); -- GitLab From 9c402819620a842cbfe39359a3ddfaac9adc8384 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 5 Mar 2024 07:59:27 +0000 Subject: [PATCH 0875/1418] sr9800: Add check for usbnet_get_endpoints [ Upstream commit 07161b2416f740a2cb87faa5566873f401440a61 ] Add check for usbnet_get_endpoints() and return the error if it fails in order to transfer the error. Signed-off-by: Chen Ni Reviewed-by: Simon Horman Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Link: https://lore.kernel.org/r/20240305075927.261284-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index f5e19f3ef6cdd..4de5144821835 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -737,7 +737,9 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) data->eeprom_len = SR9800_EEPROM_LEN; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* LED Setting Rule : * AABB:CCDD -- GitLab From 98a2feb8ece65f77e80e9b730f60d744f070b6fb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 2 Mar 2024 20:22:09 +0100 Subject: [PATCH 0876/1418] s390/cache: prevent rebuild of shared_cpu_list [ Upstream commit cb0cd4ee11142339f2d47eef6db274290b7a482d ] With commit 36bbc5b4ffab ("cacheinfo: Allow early detection and population of cache attributes") the shared cpu list for each cache level higher than L1 is rebuilt even if the list already has been set up. This is caused by the removal of the cpumask_empty() check within cache_shared_cpu_map_setup(). However architectures can enforce that the shared cpu list is not rebuilt by simply setting cpu_map_populated of the per cpu cache info structure to true, which is also the fix for this problem. Before: $ cat /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list 0-7 After: $ cat /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list 1 Fixes: 36bbc5b4ffab ("cacheinfo: Allow early detection and population of cache attributes") Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 7ee3651d00abe..732024ca005ad 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, pvt, ctype, level, cpu); } } + this_cpu_ci->cpu_map_populated = true; return 0; } -- GitLab From edf7990baa48de5097daa9ac02e06cb4c798a737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:35 +0100 Subject: [PATCH 0877/1418] bpf: Fix DEVMAP_HASH overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 281d464a34f540de166cee74b723e97ac2515ec3 ] The devmap code allocates a number hash buckets equal to the next power of two of the max_entries value provided when creating the map. When rounding up to the next power of two, the 32-bit variable storing the number of buckets can overflow, and the code checks for overflow by checking if the truncated 32-bit value is equal to 0. However, on 32-bit arches the rounding up itself can overflow mid-way through, because it ends up doing a left-shift of 32 bits on an unsigned long value. If the size of an unsigned long is four bytes, this is undefined behaviour, so there is no guarantee that we'll end up with a nice and tidy 0-value at the end. Syzbot managed to turn this into a crash on arm32 by creating a DEVMAP_HASH with max_entries > 0x80000000 and then trying to update it. Fix this by moving the overflow check to before the rounding up operation. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Link: https://lore.kernel.org/r/000000000000ed666a0611af6818@google.com Reported-and-tested-by: syzbot+8cd36f6b65f3cafd400a@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-2-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/devmap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f9a87dcc5535b..e051cbb07dac0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -131,13 +131,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) bpf_map_init_from_attr(&dtab->map, attr); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); - - if (!dtab->n_buckets) /* Overflow check */ + /* hash table size must be power of 2; roundup_pow_of_two() can + * overflow into UB on 32-bit arches, so check that first + */ + if (dtab->map.max_entries > 1UL << 31) return -EINVAL; - } - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) -- GitLab From a83fdaeaea3677b83a53f72ace2d73a19bcd6d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:36 +0100 Subject: [PATCH 0878/1418] bpf: Fix hashtab overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6787d916c2cf9850c97a0a3f73e08c43e7d973b1 ] The hashtab code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. So apply the same fix to hashtab, by moving the overflow check to before the roundup. Fixes: daaf427c6ab3 ("bpf: fix arraymap NULL deref and missing overflow and zero size checks") Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-3-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/hashtab.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 88c71de0a0a95..0c74cc9012d5c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -495,7 +495,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) num_possible_cpus()); } - /* hash table size must be power of 2 */ + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + err = -E2BIG; + if (htab->map.max_entries > 1UL << 31) + goto free_htab; + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); htab->elem_size = sizeof(struct htab_elem) + @@ -505,10 +511,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) else htab->elem_size += round_up(htab->map.value_size, 8); - err = -E2BIG; - /* prevent zero size kmalloc and check for u32 overflow */ - if (htab->n_buckets == 0 || - htab->n_buckets > U32_MAX / sizeof(struct bucket)) + /* check for u32 overflow */ + if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; err = -ENOMEM; -- GitLab From f06899582ccee09bd85d0696290e3eaca9aa042d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:37 +0100 Subject: [PATCH 0879/1418] bpf: Fix stackmap overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a4b21250bf79eef26543d35bd390448646c536b ] The stackmap code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. The commit in the fixes tag actually attempted to fix this, but the fix did not account for the UB, so the fix only works on CPUs where an overflow does result in a neat truncation to zero, which is not guaranteed. Checking the value before rounding does not have this problem. Fixes: 6183f4d3a0a2 ("bpf: Check for integer overflow when using roundup_pow_of_two()") Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Bui Quang Minh Message-ID: <20240307120340.99577-4-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/stackmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index f86db3cf72123..f0fd936cef319 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -94,11 +94,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); - /* hash table size must be power of 2 */ - n_buckets = roundup_pow_of_two(attr->max_entries); - if (!n_buckets) + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + if (attr->max_entries > 1UL << 31) return ERR_PTR(-E2BIG); + n_buckets = roundup_pow_of_two(attr->max_entries); + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) -- GitLab From 586e19c88a0cb58b6ff45ae085b3dd200d862153 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:35 +0800 Subject: [PATCH 0880/1418] iommu/vt-d: Retrieve IOMMU perfmon capability information [ Upstream commit a6a5006dad572a53b5df3f47e1471d207ae9ba49 ] The performance monitoring infrastructure, perfmon, is to support collection of information about key events occurring during operation of the remapping hardware, to aid performance tuning and debug. Each remapping hardware unit has capability registers that indicate support for performance monitoring features and enumerate the capabilities. Add alloc_iommu_pmu() to retrieve IOMMU perfmon capability information for each iommu unit. The information is stored in the iommu->pmu data structure. Capability registers are read-only, so it's safe to prefetch and store them in the pmu structure. This could avoid unnecessary VMEXIT when this code is running in the virtualization environment. Add free_iommu_pmu() to free the saved capability information when freeing the iommu unit. Add a kernel config option for the IOMMU perfmon feature. Unless a user explicitly uses the perf tool to monitor the IOMMU perfmon event, there isn't any impact for the existing IOMMU. Enable it by default. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-3-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Stable-dep-of: 70bad345e622 ("iommu: Fix compilation without CONFIG_IOMMU_INTEL") Signed-off-by: Sasha Levin --- drivers/iommu/intel/Kconfig | 11 +++ drivers/iommu/intel/Makefile | 1 + drivers/iommu/intel/dmar.c | 7 ++ drivers/iommu/intel/iommu.h | 43 ++++++++- drivers/iommu/intel/perfmon.c | 172 ++++++++++++++++++++++++++++++++++ drivers/iommu/intel/perfmon.h | 40 ++++++++ 6 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/intel/perfmon.c create mode 100644 drivers/iommu/intel/perfmon.h diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index b7dff5092fd21..12e1e90fdae13 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. +config INTEL_IOMMU_PERF_EVENTS + def_bool y + bool "Intel IOMMU performance events" + depends on INTEL_IOMMU && PERF_EVENTS + help + Selecting this option will enable the performance monitoring + infrastructure in the Intel IOMMU. It collects information about + key events occurring during operation of the remapping hardware, + to aid performance tuning and debug. These are available on modern + processors which support Intel VT-d 4.0 and later. + endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fa0dae16441cb..7af3b8a4f2a00 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 418af1db0192d..4759f79ad7b94 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1104,6 +1105,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); /* @@ -1131,6 +1135,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1146,6 +1151,8 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c99cb715bd9a2..c1348bedab3b3 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -125,6 +125,11 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_PERFCAP_REG 0x300 +#define DMAR_PERFCFGOFF_REG 0x310 +#define DMAR_PERFOVFOFF_REG 0x318 +#define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -148,6 +153,7 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) +#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -179,7 +185,8 @@ * Extended Capability Register */ -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_pms(e) (((e) >> 51) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -210,6 +217,22 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ +/* + * Decoding Perf Capability Register + */ +#define pcap_num_cntr(p) ((p) & 0xffff) +#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) +#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) +#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) +#define pcap_interrupt(p) (((p) >> 50) & 0x1) +/* The counter stride is calculated as 2 ^ (x+10) bytes */ +#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) + +/* + * Decoding Perf Event Capability Register + */ +#define pecap_es(p) ((p) & 0xfffffff) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -561,6 +584,22 @@ struct dmar_domain { iommu core */ }; +struct iommu_pmu { + struct intel_iommu *iommu; + u32 num_cntr; /* Number of counters */ + u32 num_eg; /* Number of event group */ + u32 cntr_width; /* Counter width */ + u32 cntr_stride; /* Counter Stride */ + u32 filter; /* Bitmask of filter support */ + void __iomem *base; /* the PerfMon base address */ + void __iomem *cfg_reg; /* counter configuration base address */ + void __iomem *cntr_reg; /* counter 0 address*/ + void __iomem *overflow; /* overflow status register */ + + u64 *evcap; /* Indicates all supported events */ + u32 **cntr_evcap; /* Supported events of each counter. */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -608,6 +647,8 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; + + struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c new file mode 100644 index 0000000000000..db5791a544551 --- /dev/null +++ b/drivers/iommu/intel/perfmon.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel IOMMU PerfMon + * Copyright(c) 2023 Intel Corporation. + */ +#define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include "iommu.h" +#include "perfmon.h" + +static inline void __iomem * +get_perf_reg_address(struct intel_iommu *iommu, u32 offset) +{ + u32 off = dmar_readl(iommu->reg + offset); + + return iommu->reg + off; +} + +int alloc_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu; + int i, j, ret; + u64 perfcap; + u32 cap; + + if (!ecap_pms(iommu->ecap)) + return 0; + + /* The IOMMU PMU requires the ECMD support as well */ + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); + /* The performance monitoring is not supported. */ + if (!perfcap) + return -ENODEV; + + /* Sanity check for the number of the counters and event groups */ + if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) + return -ENODEV; + + /* The interrupt on overflow is required */ + if (!pcap_interrupt(perfcap)) + return -ENODEV; + + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); + if (!iommu_pmu) + return -ENOMEM; + + iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); + iommu_pmu->filter = pcap_filters_mask(perfcap); + iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); + iommu_pmu->num_eg = pcap_num_event_group(perfcap); + + iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); + if (!iommu_pmu->evcap) { + ret = -ENOMEM; + goto free_pmu; + } + + /* Parse event group capabilities */ + for (i = 0; i < iommu_pmu->num_eg; i++) { + u64 pcap; + + pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + + i * IOMMU_PMU_CAP_REGS_STEP); + iommu_pmu->evcap[i] = pecap_es(pcap); + } + + iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap) { + ret = -ENOMEM; + goto free_pmu_evcap; + } + for (i = 0; i < iommu_pmu->num_cntr; i++) { + iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap[i]) { + ret = -ENOMEM; + goto free_pmu_cntr_evcap; + } + /* + * Set to the global capabilities, will adjust according + * to per-counter capabilities later. + */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; + } + + iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); + iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); + iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); + + /* + * Check per-counter capabilities. All counters should have the + * same capabilities on Interrupt on Overflow Support and Counter + * Width. + */ + for (i = 0; i < iommu_pmu->num_cntr; i++) { + cap = dmar_readl(iommu_pmu->cfg_reg + + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTRCAP_OFFSET); + if (!iommu_cntrcap_pcc(cap)) + continue; + + /* + * It's possible that some counters have a different + * capability because of e.g., HW bug. Check the corner + * case here and simply drop those counters. + */ + if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || + !iommu_cntrcap_ios(cap)) { + iommu_pmu->num_cntr = i; + pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", + iommu_pmu->num_cntr); + } + + /* Clear the pre-defined events group */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = 0; + + /* Override with per-counter event capabilities */ + for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { + cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTREVCAP_OFFSET + + (j * IOMMU_PMU_OFF_REGS_STEP)); + iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); + /* + * Some events may only be supported by a specific counter. + * Track them in the evcap as well. + */ + iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); + } + } + + iommu_pmu->iommu = iommu; + iommu->pmu = iommu_pmu; + + return 0; + +free_pmu_cntr_evcap: + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); +free_pmu_evcap: + kfree(iommu_pmu->evcap); +free_pmu: + kfree(iommu_pmu); + + return ret; +} + +void free_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (iommu_pmu->evcap) { + int i; + + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); + } + kfree(iommu_pmu->evcap); + kfree(iommu_pmu); + iommu->pmu = NULL; +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h new file mode 100644 index 0000000000000..4b0d9c1fea6ff --- /dev/null +++ b/drivers/iommu/intel/perfmon.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * PERFCFGOFF_REG, PERFFRZOFF_REG + * PERFOVFOFF_REG, PERFCNTROFF_REG + */ +#define IOMMU_PMU_NUM_OFF_REGS 4 +#define IOMMU_PMU_OFF_REGS_STEP 4 + +#define IOMMU_PMU_CFG_OFFSET 0x100 +#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 +#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 +#define IOMMU_PMU_CFG_SIZE 0x8 +#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 + +#define IOMMU_PMU_CAP_REGS_STEP 8 + +#define iommu_cntrcap_pcc(p) ((p) & 0x1) +#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) +#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) +#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) + +#define iommu_event_select(p) ((p) & 0xfffffff) +#define iommu_event_group(p) (((p) >> 28) & 0xf) + +#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS +int alloc_iommu_pmu(struct intel_iommu *iommu); +void free_iommu_pmu(struct intel_iommu *iommu); +#else +static inline int +alloc_iommu_pmu(struct intel_iommu *iommu) +{ + return 0; +} + +static inline void +free_iommu_pmu(struct intel_iommu *iommu) +{ +} +#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ -- GitLab From 8c91a4bfc7f8f42e228ba91eb37d0d33c3450311 Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Thu, 7 Mar 2024 20:44:19 +0100 Subject: [PATCH 0881/1418] iommu: Fix compilation without CONFIG_IOMMU_INTEL [ Upstream commit 70bad345e622c23bb530016925c936ab04a646ac ] When the kernel is comiled with CONFIG_IRQ_REMAP=y but without CONFIG_IOMMU_INTEL compilation fails since commit def054b01a8678 with an undefined reference to device_rbtree_find(). This patch makes sure that intel specific code is only compiled with CONFIG_IOMMU_INTEL=y. Signed-off-by: Bert Karwatzki Fixes: 80a9b50c0b9e ("iommu/vt-d: Improve ITE fault handling if target device isn't present") Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240307194419.15801-1-spasswolf@web.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/Kconfig | 2 +- drivers/iommu/intel/Makefile | 2 ++ drivers/iommu/irq_remapping.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dc5f7a156ff5e..dc19e7fb07cfe 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -192,7 +192,7 @@ source "drivers/iommu/intel/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE + select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 7af3b8a4f2a00..29d26a4371327 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -5,5 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o +ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +endif obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 83314b9d8f38b..ee59647c20501 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -99,7 +99,8 @@ int __init irq_remapping_prepare(void) if (disable_irq_remap) return -ENOSYS; - if (intel_irq_remap_ops.prepare() == 0) + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && + intel_irq_remap_ops.prepare() == 0) remap_ops = &intel_irq_remap_ops; else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) -- GitLab From 66e74f2f74a6930a93ef9fa581bf3ff77ba41692 Mon Sep 17 00:00:00 2001 From: Shiming Cheng Date: Thu, 7 Mar 2024 18:01:57 +0800 Subject: [PATCH 0882/1418] ipv6: fib6_rules: flush route cache when rule is changed [ Upstream commit c4386ab4f6c600f75fdfd21143f89bac3e625d0d ] When rule policy is changed, ipv6 socket cache is not refreshed. The sock's skb still uses a outdated route cache and was sent to a wrong interface. To avoid this error we should update fib node's version when rule is changed. Then skb's route will be reroute checked as route cache version is already different with fib node version. The route cache is refreshed to match the latest rule. Fixes: 101367c2f8c4 ("[IPV6]: Policy Routing Rules") Signed-off-by: Shiming Cheng Signed-off-by: Lena Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/fib6_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7c20038330104..be52b18e08a6b 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } +static void fib6_rule_flush_cache(struct fib_rules_ops *ops) +{ + rt_genid_bump_ipv6(ops->fro_net); +} + static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, + .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, -- GitLab From 60044ab84836359534bd7153b92e9c1584140e4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 10:07:16 +0000 Subject: [PATCH 0883/1418] net: ip_tunnel: make sure to pull inner header in ip_tunnel_rcv() [ Upstream commit b0ec2abf98267f14d032102551581c833b0659d3 ] Apply the same fix than ones found in : 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. syzbot reported: BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __ipgre_rcv+0x9bc/0xbc0 net/ipv4/ip_gre.c:389 ipgre_rcv net/ipv4/ip_gre.c:411 [inline] gre_rcv+0x423/0x19f0 net/ipv4/ip_gre.c:447 gre_rcv+0x2a4/0x390 net/ipv4/gre_demux.c:163 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 netif_receive_skb_internal net/core/dev.c:5734 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5793 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1556 tun_get_user+0x53b9/0x66e0 drivers/net/tun.c:2009 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590 alloc_pages_mpol+0x62b/0x9d0 mm/mempolicy.c:2133 alloc_pages+0x1be/0x1e0 mm/mempolicy.c:2204 skb_page_frag_refill+0x2bf/0x7c0 net/core/sock.c:2909 tun_build_skb drivers/net/tun.c:1686 [inline] tun_get_user+0xe0a/0x66e0 drivers/net/tun.c:1826 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 328f9068c6a43..3445e576b05bc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, bool log_ecn_error) { const struct iphdr *iph = ip_hdr(skb); - int err; + int nh, err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + iph = (struct iphdr *)(skb->head + nh); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) -- GitLab From 2a2ff709511617de9c6c072eeee82bcbbdfecaf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20L=27h=C3=B4pital?= Date: Thu, 7 Mar 2024 12:19:06 +0100 Subject: [PATCH 0884/1418] net: phy: fix phy_get_internal_delay accessing an empty array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4469c0c5b14a0919f5965c7ceac96b523eb57b79 ] The phy_get_internal_delay function could try to access to an empty array in the case that the driver is calling phy_get_internal_delay without defining delay_values and rx-internal-delay-ps or tx-internal-delay-ps is defined to 0 in the device-tree. This will lead to "unable to handle kernel NULL pointer dereference at virtual address 0". To avoid this kernel oops, the test should be delay >= 0. As there is already delay < 0 test just before, the test could only be size == 0. Fixes: 92252eec913b ("net: phy: Add a helper to return the index for of the internal delay") Co-developed-by: Enguerrand de Ribaucourt Signed-off-by: Enguerrand de Ribaucourt Signed-off-by: Kévin L'hôpital Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 944f76e6fc8eb..45b07004669d6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2893,7 +2893,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { -- GitLab From 19af2ce84cd9a4d0c760fa913716393520ef8be4 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 7 Mar 2024 09:01:08 +0800 Subject: [PATCH 0885/1418] net: hns3: fix wrong judgment condition issue [ Upstream commit 07a1d6dc90baedcf5d713e2b003b9e387130ee30 ] In hns3_dcbnl_ieee_delapp, should check ieee_delapp not ieee_setapp. This path fix the wrong judgment. Fixes: 0ba22bcb222d ("net: hns3: add support config dscp map to tc") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c index 3b6dbf158b98d..f72dc0cee30e5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c @@ -76,7 +76,7 @@ static int hns3_dcbnl_ieee_delapp(struct net_device *ndev, struct dcb_app *app) if (hns3_nic_resetting(ndev)) return -EBUSY; - if (h->kinfo.dcb_ops->ieee_setapp) + if (h->kinfo.dcb_ops->ieee_delapp) return h->kinfo.dcb_ops->ieee_delapp(h, app); return -EOPNOTSUPP; -- GitLab From b3cf70472a600bcb2efe24906bc9bc6014d4c6f6 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 7 Mar 2024 09:01:11 +0800 Subject: [PATCH 0886/1418] net: hns3: fix kernel crash when 1588 is received on HIP08 devices [ Upstream commit 0fbcf2366ba9888cf02eda23e35fde7f7fcc07c3 ] The HIP08 devices does not register the ptp devices, so the hdev->ptp is NULL, but the hardware can receive 1588 messages, and set the HNS3_RXD_TS_VLD_B bit, so, if match this case, the access of hdev->ptp->flags will cause a kernel crash: [ 5888.946472] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 [ 5888.946475] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 ... [ 5889.266118] pc : hclge_ptp_get_rx_hwts+0x40/0x170 [hclge] [ 5889.272612] lr : hclge_ptp_get_rx_hwts+0x34/0x170 [hclge] [ 5889.279101] sp : ffff800012c3bc50 [ 5889.283516] x29: ffff800012c3bc50 x28: ffff2040002be040 [ 5889.289927] x27: ffff800009116484 x26: 0000000080007500 [ 5889.296333] x25: 0000000000000000 x24: ffff204001c6f000 [ 5889.302738] x23: ffff204144f53c00 x22: 0000000000000000 [ 5889.309134] x21: 0000000000000000 x20: ffff204004220080 [ 5889.315520] x19: ffff204144f53c00 x18: 0000000000000000 [ 5889.321897] x17: 0000000000000000 x16: 0000000000000000 [ 5889.328263] x15: 0000004000140ec8 x14: 0000000000000000 [ 5889.334617] x13: 0000000000000000 x12: 00000000010011df [ 5889.340965] x11: bbfeff4d22000000 x10: 0000000000000000 [ 5889.347303] x9 : ffff800009402124 x8 : 0200f78811dfbb4d [ 5889.353637] x7 : 2200000000191b01 x6 : ffff208002a7d480 [ 5889.359959] x5 : 0000000000000000 x4 : 0000000000000000 [ 5889.366271] x3 : 0000000000000000 x2 : 0000000000000000 [ 5889.372567] x1 : 0000000000000000 x0 : ffff20400095c080 [ 5889.378857] Call trace: [ 5889.382285] hclge_ptp_get_rx_hwts+0x40/0x170 [hclge] [ 5889.388304] hns3_handle_bdinfo+0x324/0x410 [hns3] [ 5889.394055] hns3_handle_rx_bd+0x60/0x150 [hns3] [ 5889.399624] hns3_clean_rx_ring+0x84/0x170 [hns3] [ 5889.405270] hns3_nic_common_poll+0xa8/0x220 [hns3] [ 5889.411084] napi_poll+0xcc/0x264 [ 5889.415329] net_rx_action+0xd4/0x21c [ 5889.419911] __do_softirq+0x130/0x358 [ 5889.424484] irq_exit+0x134/0x154 [ 5889.428700] __handle_domain_irq+0x88/0xf0 [ 5889.433684] gic_handle_irq+0x78/0x2c0 [ 5889.438319] el1_irq+0xb8/0x140 [ 5889.442354] arch_cpu_idle+0x18/0x40 [ 5889.446816] default_idle_call+0x5c/0x1c0 [ 5889.451714] cpuidle_idle_call+0x174/0x1b0 [ 5889.456692] do_idle+0xc8/0x160 [ 5889.460717] cpu_startup_entry+0x30/0xfc [ 5889.465523] secondary_start_kernel+0x158/0x1ec [ 5889.470936] Code: 97ffab78 f9411c14 91408294 f9457284 (f9400c80) [ 5889.477950] SMP: stopping secondary CPUs [ 5890.514626] SMP: failed to stop secondary CPUs 0-69,71-95 [ 5890.522951] Starting crashdump kernel... Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index a40b1583f1149..0f06f95b09bc2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -120,7 +120,7 @@ void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb, u64 ns = nsec; u32 sec_h; - if (!test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) + if (!hdev->ptp || !test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) return; /* Since the BD does not have enough space for the higher 16 bits of -- GitLab From 8ec278830c1a13776f49f70041ad9cd1146a2af0 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Thu, 7 Mar 2024 09:01:14 +0800 Subject: [PATCH 0887/1418] net: hns3: fix port duplex configure error in IMP reset [ Upstream commit 11d80f79dd9f871a52feba4bf24b5ac39f448eb7 ] Currently, the mac port is fixed to configured as full dplex mode in hclge_mac_init() when driver initialization or reset restore. Users may change the mode to half duplex with ethtool, so it may cause the user configuration dropped after reset. To fix it, don't change the duplex mode when resetting. Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 48b0cb5ec5d29..27037ce795902 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2990,7 +2990,10 @@ static int hclge_mac_init(struct hclge_dev *hdev) int ret; hdev->support_sfp_query = true; - hdev->hw.mac.duplex = HCLGE_MAC_FULL; + + if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + hdev->hw.mac.duplex = HCLGE_MAC_FULL; + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, hdev->hw.mac.duplex, hdev->hw.mac.lane_num); if (ret) -- GitLab From f30e6322bc518ba6a9428bb3851223f09870001e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Jul 2023 12:06:32 -0700 Subject: [PATCH 0888/1418] Bluetooth: MGMT: Fix always using HCI_MAX_AD_LENGTH [ Upstream commit 112b5090c21905531314fee41f691f0317bbf4f6 ] HCI_MAX_AD_LENGTH shall only be used if the controller doesn't support extended advertising, otherwise HCI_MAX_EXT_AD_LENGTH shall be used instead. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2ab3e8d67fc1 ("Bluetooth: Fix eir name length") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_event.c | 12 +++++++----- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6bc6de5345261..53155cb703b5d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1710,6 +1710,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* Maximum advertising length */ +#define max_adv_len(dev) \ + (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH) + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: * * C24: Mandatory if the LE Controller supports Connection State and either diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6b746ab9f6d21..b150dee88f35c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1761,7 +1761,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct discovery_state *d = &hdev->discovery; - if (len > HCI_MAX_AD_LENGTH) + if (len > max_adv_len(hdev)) return; bacpy(&d->last_adv_addr, bdaddr); @@ -6240,8 +6240,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } - if (!ext_adv && len > HCI_MAX_AD_LENGTH) { - bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes"); + if (len > max_adv_len(hdev)) { + bt_dev_err_ratelimited(hdev, + "adv larger than maximum supported"); return; } @@ -6306,7 +6307,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, type); - if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { + if (!ext_adv && conn && type == LE_ADV_IND && + len <= max_adv_len(hdev)) { /* Store report for later inclusion by * mgmt_device_connected */ @@ -6447,7 +6449,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; - if (info->length <= HCI_MAX_AD_LENGTH) { + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, NULL, 0, rssi, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4a35535f56607..1486fb9bb78f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8436,8 +8436,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, supported_flags = get_supported_adv_flags(hdev); rp->supported_flags = cpu_to_le32(supported_flags); - rp->max_adv_data_len = HCI_MAX_AD_LENGTH; - rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; + rp->max_adv_data_len = max_adv_len(hdev); + rp->max_scan_rsp_len = max_adv_len(hdev); rp->max_instances = hdev->le_num_of_adv_sets; rp->num_instances = hdev->adv_instance_cnt; @@ -8473,7 +8473,7 @@ static u8 calculate_name_len(struct hci_dev *hdev) static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, bool is_adv_data) { - u8 max_len = HCI_MAX_AD_LENGTH; + u8 max_len = max_adv_len(hdev); if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | -- GitLab From 99f30e12e588f9982a6eb1916e53510bff25b3b8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 18 Aug 2023 14:19:27 -0700 Subject: [PATCH 0889/1418] Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH [ Upstream commit db08722fc7d46168fe31d9b8a7b29229dd959f9f ] There a few instances still using HCI_MAX_AD_LENGTH instead of using max_adv_len which takes care of detecting what is the actual maximum length depending on if the controller supports EA or not. Fixes: 112b5090c219 ("Bluetooth: MGMT: Fix always using HCI_MAX_AD_LENGTH") Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2ab3e8d67fc1 ("Bluetooth: Fix eir name length") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 6 +++--- net/bluetooth/eir.c | 2 +- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 53155cb703b5d..c50a41f1782a4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -81,7 +81,7 @@ struct discovery_state { u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; - u8 last_adv_data[HCI_MAX_AD_LENGTH]; + u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; @@ -293,7 +293,7 @@ struct adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[HCI_MAX_AD_LENGTH]; + __u8 value[HCI_MAX_EXT_AD_LENGTH]; }; struct adv_rssi_thresholds { @@ -727,7 +727,7 @@ struct hci_conn { __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; - __u8 le_adv_data[HCI_MAX_AD_LENGTH]; + __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 le_adv_data_len; __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; __u8 le_per_adv_data_len; diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 8a85f6cdfbc16..9214189279e80 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -33,7 +33,7 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) size_t complete_len; /* no space left for name (+ NULL + type + len) */ - if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; /* use complete name if present and fits */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1486fb9bb78f7..21c0924787e22 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5378,9 +5378,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_AD_LENGTH || - length > HCI_MAX_AD_LENGTH || - (offset + length) > HCI_MAX_AD_LENGTH) + if (offset >= HCI_MAX_EXT_AD_LENGTH || + length > HCI_MAX_EXT_AD_LENGTH || + (offset + length) > HCI_MAX_EXT_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); -- GitLab From 262a77d85e8b448ac83eb4e882e0f63056b19f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Thu, 7 Mar 2024 17:42:05 +0100 Subject: [PATCH 0890/1418] Bluetooth: Fix eir name length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2ab3e8d67fc1d4a7638b769cf83023ec209fc0a9 ] According to Section 1.2 of Core Specification Supplement Part A the complete or short name strings are defined as utf8s, which should not include the trailing NULL for variable length array as defined in Core Specification Vol1 Part E Section 2.9.3. Removing the trailing NULL allows PTS to retrieve the random address based on device name, e.g. for SM/PER/KDU/BV-02-C, SM/PER/KDU/BV-08-C or GAP/BROB/BCST/BV-03-C. Fixes: f61851f64b17 ("Bluetooth: Fix append max 11 bytes of name to scan rsp data") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/eir.c | 29 +++++++---------------------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 9214189279e80..1bc51e2b05a34 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,48 +13,33 @@ #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) -{ - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - /* If data is already NULL terminated just pass it directly */ - if (data[data_len - 1] == '\0') - return eir_append_data(eir, eir_len, type, data, data_len); - - memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(eir, eir_len, type, name, sizeof(name)); -} - u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; size_t complete_len; - /* no space left for name (+ NULL + type + len) */ - if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + /* no space left for name (+ type + len) */ + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2) return ad_len; /* use complete name if present and fits */ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len + 1); + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); /* use short name if present */ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->short_name, - short_len == HCI_MAX_SHORT_NAME_LENGTH ? - short_len : short_len + 1); + short_len); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ if (complete_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 21c0924787e22..716f6dc4934b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8465,7 +8465,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, static u8 calculate_name_len(struct hci_dev *hdev) { - u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ return eir_append_local_name(hdev, buf, 0); } -- GitLab From 877cce250a7b55588318bedc4b0271fddaf28999 Mon Sep 17 00:00:00 2001 From: Tim Pambor Date: Tue, 5 Mar 2024 12:06:08 +0100 Subject: [PATCH 0891/1418] net: phy: dp83822: Fix RGMII TX delay configuration [ Upstream commit c8a5c731fd1223090af57da33838c671a7fc6a78 ] The logic for enabling the TX clock shift is inverse of enabling the RX clock shift. The TX clock shift is disabled when DP83822_TX_CLK_SHIFT is set. Correct the current behavior and always write the delay configuration to ensure consistent delay settings regardless of bootloader configuration. Reference: https://www.ti.com/lit/ds/symlink/dp83822i.pdf p. 69 Fixes: 8095295292b5 ("net: phy: DP83822: Add setting the fixed internal delay") Signed-off-by: Tim Pambor Link: https://lore.kernel.org/r/20240305110608.104072-1-tp@osasysteme.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/dp83822.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 267e6fd3d4448..57411ee1d8374 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -380,7 +380,7 @@ static int dp83822_config_init(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; - int rgmii_delay; + int rgmii_delay = 0; s32 rx_int_delay; s32 tx_int_delay; int err = 0; @@ -390,30 +390,33 @@ static int dp83822_config_init(struct phy_device *phydev) rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, true); - if (rx_int_delay <= 0) - rgmii_delay = 0; - else - rgmii_delay = DP83822_RX_CLK_SHIFT; + /* Set DP83822_RX_CLK_SHIFT to enable rx clk internal delay */ + if (rx_int_delay > 0) + rgmii_delay |= DP83822_RX_CLK_SHIFT; tx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, false); + + /* Set DP83822_TX_CLK_SHIFT to disable tx clk internal delay */ if (tx_int_delay <= 0) - rgmii_delay &= ~DP83822_TX_CLK_SHIFT; - else rgmii_delay |= DP83822_TX_CLK_SHIFT; - if (rgmii_delay) { - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, rgmii_delay); - if (err) - return err; - } + err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay); + if (err) + return err; + + err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); - phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + if (err) + return err; } else { - phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } if (dp83822->fx_enabled) { -- GitLab From 20fd74fa884f30515b06f725e9207b064e1ea97b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Mar 2024 16:48:28 +0530 Subject: [PATCH 0892/1418] OPP: debugfs: Fix warning around icc_get_name() [ Upstream commit 28330ceb953e39880ea77da4895bb902a1244860 ] If the kernel isn't built with interconnect support, icc_get_name() returns NULL and we get following warning: drivers/opp/debugfs.c: In function 'bw_name_read': drivers/opp/debugfs.c:43:42: error: '%.62s' directive argument is null [-Werror=format-overflow=] i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); Fix it. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402141313.81ltVF5g-lkp@intel.com/ Fixes: 0430b1d5704b0 ("opp: Expose bandwidth information via debugfs") Signed-off-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Sasha Levin --- drivers/opp/debugfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 2c7fb683441ef..de81bbf4be100 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -37,10 +37,12 @@ static ssize_t bw_name_read(struct file *fp, char __user *userbuf, size_t count, loff_t *ppos) { struct icc_path *path = fp->private_data; + const char *name = icc_get_name(path); char buf[64]; - int i; + int i = 0; - i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + if (name) + i = scnprintf(buf, sizeof(buf), "%.62s\n", name); return simple_read_from_buffer(userbuf, count, ppos, buf, i); } -- GitLab From dcdcf9a71730b35ba0096be166149e160fa743d0 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:49 +0000 Subject: [PATCH 0893/1418] tcp: fix incorrect parameter validation in the do_tcp_getsockopt() function [ Upstream commit 716edc9706deb3bb2ff56e2eeb83559cea8f22db ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Reviewed-by: Jason Xing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 86e7695d91adf..5a165e29f7be4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4102,11 +4102,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; -- GitLab From 6ccbd0227fb81b8a22b77795fd6ab2ff0d24db7a Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0894/1418] ipmr: fix incorrect parameter validation in the ip_mroute_getsockopt() function [ Upstream commit 5c3be3e0eb44b7f978bb6cbb20ad956adb93f736 ] The 'olr' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'olr' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ipmr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d5421c38c2aae..3ed9ed2bffd29 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1581,9 +1581,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; + + olr = min_t(unsigned int, olr, sizeof(int)); + if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) -- GitLab From cfea1c9ad406f39a1c416ec981107bf5ce09707c Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0895/1418] l2tp: fix incorrect parameter validation in the pppol2tp_getsockopt() function [ Upstream commit 955e9876ba4ee26eeaab1b13517f5b2c88e73d55 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reviewed-by: Tom Parkin Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_ppp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f011af6601c9c..6146e4e67bbb5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1356,11 +1356,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + err = -ENOTCONN; if (!sk->sk_user_data) goto end; -- GitLab From 6bb3c0473863f3c4f6430799fa45423a5b2ed75e Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0896/1418] udp: fix incorrect parameter validation in the udp_lib_getsockopt() function [ Upstream commit 4bb3ba7b74fceec6f558745b25a43c6521cf5506 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Willem de Bruijn Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 87d759bab0012..7856b7a3e0ee9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2790,11 +2790,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case UDP_CORK: val = udp_test_bit(CORK, sk); -- GitLab From 440e278cb53b8dd6627c32e84950350083c39d35 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0897/1418] net: kcm: fix incorrect parameter validation in the kcm_getsockopt) function [ Upstream commit 3ed5f415133f9b7518fbe55ba9ae9a3f5e700929 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/kcm/kcmsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 65845c59c0655..7d37bf4334d26 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1274,10 +1274,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case KCM_RECV_DISABLE: val = kcm->rx_disabled; -- GitLab From b7c2b7eae2864b84644e535635fd88b0bcce0065 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0898/1418] net/x25: fix incorrect parameter validation in the x25_getsockopt() function [ Upstream commit d6eb8de2015f0c24822e47356f839167ebde2945 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/x25/af_x25.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5c7ad301d742e..5a8b2ea56564e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -460,12 +460,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) goto out; - len = min_t(unsigned int, len, sizeof(int)); - rc = -EINVAL; if (len < 0) goto out; + len = min_t(unsigned int, len, sizeof(int)); + rc = -EFAULT; if (put_user(len, optlen)) goto out; -- GitLab From c9b4e220dd18f79507803f38a55d53b483f6c9c3 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 8 Mar 2024 22:25:40 +0800 Subject: [PATCH 0899/1418] nfp: flower: handle acti_netdevs allocation failure [ Upstream commit 84e95149bd341705f0eca6a7fcb955c548805002 ] The kmalloc_array() in nfp_fl_lag_do_work() will return null, if the physical memory has run out. As a result, if we dereference the acti_netdevs, the null pointer dereference bugs will happen. This patch adds a check to judge whether allocation failure occurs. If it happens, the delayed work will be rescheduled and try again. Fixes: bb9a8d031140 ("nfp: flower: monitor and offload LAG groups") Signed-off-by: Duoming Zhou Reviewed-by: Louis Peens Link: https://lore.kernel.org/r/20240308142540.9674-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/flower/lag_conf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index e92860e20a24a..c6a2c302a8c8b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -308,6 +308,11 @@ static void nfp_fl_lag_do_work(struct work_struct *work) acti_netdevs = kmalloc_array(entry->slave_cnt, sizeof(*acti_netdevs), GFP_KERNEL); + if (!acti_netdevs) { + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } /* Include sanity check in the loop. It may be that a bond has * changed between processing the last notification and the -- GitLab From b605c3831fb47731a55a1e50e919c45c308f96e0 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 11 Mar 2024 12:27:22 +0000 Subject: [PATCH 0900/1418] bpf: hardcode BPF_PROG_PACK_SIZE to 2MB * num_possible_nodes() [ Upstream commit d6170e4aaf86424c24ce06e355b4573daa891b17 ] On some architectures like ARM64, PMD_SIZE can be really large in some configurations. Like with CONFIG_ARM64_64K_PAGES=y the PMD_SIZE is 512MB. Use 2MB * num_possible_nodes() as the size for allocations done through the prog pack allocator. On most architectures, PMD_SIZE will be equal to 2MB in case of 4KB pages and will be greater than 2MB for bigger page sizes. Fixes: ea2babac63d4 ("bpf: Simplify bpf_prog_pack_[size|mask]") Reported-by: "kernelci.org bot" Closes: https://lore.kernel.org/all/7e216c88-77ee-47b8-becc-a0f780868d3c@sirena.org.uk/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403092219.dhgcuz2G-lkp@intel.com/ Suggested-by: Song Liu Signed-off-by: Puranjay Mohan Message-ID: <20240311122722.86232-1-puranjay12@gmail.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 76bf1de261152..44abf88e1bb0d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -857,7 +857,12 @@ static LIST_HEAD(pack_list); * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE -#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes()) +/* PMD_SIZE is really big for some archs. It doesn't make sense to + * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to + * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be + * greater than or equal to 2MB. + */ +#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes()) #else #define BPF_PROG_PACK_SIZE PAGE_SIZE #endif -- GitLab From 2b50b4f0d40dee8181148a39ca5bb5079a35777c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 11 Mar 2024 13:42:55 -0400 Subject: [PATCH 0901/1418] dm raid: fix false positive for requeue needed during reshape [ Upstream commit b25b8f4b8ecef0f48c05f0c3572daeabefe16526 ] An empty flush doesn't have a payload, so it should never be looked at when considering to possibly requeue a bio for the case when a reshape is in progress. Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") Reported-by: Patrick Plenefisch Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4b7528dc2fd08..7fbce214e00f5 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3325,14 +3325,14 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; md_handle_request(mddev, bio); -- GitLab From ad10289f68f45649816cc68eb93f45fd5ec48a15 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 11 Mar 2024 15:06:39 +0100 Subject: [PATCH 0902/1418] dm: call the resume method on internal suspend [ Upstream commit 65e8fbde64520001abf1c8d0e573561b4746ef38 ] There is this reported crash when experimenting with the lvm2 testsuite. The list corruption is caused by the fact that the postsuspend and resume methods were not paired correctly; there were two consecutive calls to the origin_postsuspend function. The second call attempts to remove the "hash_list" entry from a list, while it was already removed by the first call. Fix __dm_internal_resume so that it calls the preresume and resume methods of the table's targets. If a preresume method of some target fails, we are in a tricky situation. We can't return an error because dm_internal_resume isn't supposed to return errors. We can't return success, because then the "resume" and "postsuspend" methods would not be paired correctly. So, we set the DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace tools, but it won't cause a kernel crash. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 Call Trace: ? die+0x2d/0x80 ? do_trap+0xeb/0xf0 ? __list_del_entry_valid_or_report+0x77/0xc0 ? do_error_trap+0x60/0x80 ? __list_del_entry_valid_or_report+0x77/0xc0 ? exc_invalid_op+0x49/0x60 ? __list_del_entry_valid_or_report+0x77/0xc0 ? asm_exc_invalid_op+0x16/0x20 ? table_deps+0x1b0/0x1b0 [dm_mod] ? __list_del_entry_valid_or_report+0x77/0xc0 origin_postsuspend+0x1a/0x50 [dm_snapshot] dm_table_postsuspend_targets+0x34/0x50 [dm_mod] dm_suspend+0xd8/0xf0 [dm_mod] dev_suspend+0x1f2/0x2f0 [dm_mod] ? table_deps+0x1b0/0x1b0 [dm_mod] ctl_ioctl+0x300/0x5f0 [dm_mod] dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] __x64_compat_sys_ioctl+0x104/0x170 do_syscall_64+0x184/0x1b0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0xf7e6aead ---[ end trace 0000000000000000 ]--- Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0ec85d159bcde..29270f6f272f6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2897,6 +2897,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2905,12 +2908,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); -- GitLab From 50c0ad785a780c72a2fdaba10b38c645ffb4eae6 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 24 Oct 2023 08:07:38 +0000 Subject: [PATCH 0903/1418] drm/tegra: dsi: Add missing check for of_find_device_by_node [ Upstream commit afe6fcb9775882230cd29b529203eabd5d2a638d ] Add check for the return value of of_find_device_by_node() and return the error if it fails in order to avoid NULL pointer dereference. Fixes: e94236cde4d5 ("drm/tegra: dsi: Add ganged mode support") Signed-off-by: Chen Ni Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20231024080738.825553-1-nichen@iscas.ac.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index de1333dc0d867..0adce882f157b 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1534,9 +1534,11 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) np = of_parse_phandle(dsi->dev->of_node, "nvidia,ganged-mode", 0); if (np) { struct platform_device *gangster = of_find_device_by_node(np); + of_node_put(np); + if (!gangster) + return -EPROBE_DEFER; dsi->slave = platform_get_drvdata(gangster); - of_node_put(np); if (!dsi->slave) { put_device(&gangster->dev); -- GitLab From f4a1a30f18373b05ec7fc5bd340a884b833f8d33 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Wed, 4 Oct 2023 22:10:55 +0800 Subject: [PATCH 0904/1418] drm/tegra: dpaux: Fix PM disable depth imbalance in tegra_dpaux_probe [ Upstream commit 0800880f4eb789b7d299db40f2e86e056bd33a4e ] The pm_runtime_enable function increases the power disable depth, which means that we must perform a matching decrement on the error handling path to maintain balance within the given context. Additionally, we need to address the same issue for pm_runtime_get_sync. We fix this by invoking pm_runtime_disable and pm_runtime_put_sync when error returns. Fixes: 82b81b3ec1a7 ("drm/tegra: dpaux: Implement runtime PM") Signed-off-by: Zhang Shurong Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/tencent_B13DB7F6C0023C46157250A524966F326A09@qq.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dpaux.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index d773ef4854188..b563988fb6848 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -524,7 +524,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) if (err < 0) { dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n", dpaux->irq, err); - return err; + goto err_pm_disable; } disable_irq(dpaux->irq); @@ -544,7 +544,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) */ err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C); if (err < 0) - return err; + goto err_pm_disable; #ifdef CONFIG_GENERIC_PINCONF dpaux->desc.name = dev_name(&pdev->dev); @@ -557,7 +557,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev) dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); if (IS_ERR(dpaux->pinctrl)) { dev_err(&pdev->dev, "failed to register pincontrol\n"); - return PTR_ERR(dpaux->pinctrl); + err = PTR_ERR(dpaux->pinctrl); + goto err_pm_disable; } #endif /* enable and clear all interrupts */ @@ -573,10 +574,15 @@ static int tegra_dpaux_probe(struct platform_device *pdev) err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux); if (err < 0) { dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err); - return err; + goto err_pm_disable; } return 0; + +err_pm_disable: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return err; } static int tegra_dpaux_remove(struct platform_device *pdev) -- GitLab From d3e8c2409a35b26bb4756819ace721ff7c8fd4a6 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Thu, 16 Sep 2021 18:56:40 +0800 Subject: [PATCH 0905/1418] drm/tegra: dsi: Make use of the helper function dev_err_probe() [ Upstream commit fc75e4fcbd1e4252a0481ebb23cd4516c127a8e2 ] When possible use dev_err_probe help to properly deal with the PROBE_DEFER error, the benefit is that DEFER issue will be logged in the devices_deferred debugfs file. And using dev_err_probe() can reduce code size, the error value gets printed. Signed-off-by: Cai Huoqing Signed-off-by: Thierry Reding Stable-dep-of: 830c1ded3563 ("drm/tegra: dsi: Fix some error handling paths in tegra_dsi_probe()") Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 0adce882f157b..6cbba2adb6e5a 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1591,28 +1591,24 @@ static int tegra_dsi_probe(struct platform_device *pdev) } dsi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(dsi->clk)) { - dev_err(&pdev->dev, "cannot get DSI clock\n"); - return PTR_ERR(dsi->clk); - } + if (IS_ERR(dsi->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), + "cannot get DSI clock\n"); dsi->clk_lp = devm_clk_get(&pdev->dev, "lp"); - if (IS_ERR(dsi->clk_lp)) { - dev_err(&pdev->dev, "cannot get low-power clock\n"); - return PTR_ERR(dsi->clk_lp); - } + if (IS_ERR(dsi->clk_lp)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), + "cannot get low-power clock\n"); dsi->clk_parent = devm_clk_get(&pdev->dev, "parent"); - if (IS_ERR(dsi->clk_parent)) { - dev_err(&pdev->dev, "cannot get parent clock\n"); - return PTR_ERR(dsi->clk_parent); - } + if (IS_ERR(dsi->clk_parent)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), + "cannot get parent clock\n"); dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi"); - if (IS_ERR(dsi->vdd)) { - dev_err(&pdev->dev, "cannot get VDD supply\n"); - return PTR_ERR(dsi->vdd); - } + if (IS_ERR(dsi->vdd)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), + "cannot get VDD supply\n"); err = tegra_dsi_setup_clocks(dsi); if (err < 0) { -- GitLab From a57bbd606a8a0b67cb4e7a95becba911393e01fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:08 +0200 Subject: [PATCH 0906/1418] drm/tegra: dsi: Fix some error handling paths in tegra_dsi_probe() [ Upstream commit 830c1ded356369cd1303e8bb87ce3fea6e744de8 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: dec727399a4b ("drm/tegra: Add DSI support") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/16820073278d031f6c474a08d5f22a255158585e.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 54 ++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 6cbba2adb6e5a..815e32e05f600 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1586,44 +1586,58 @@ static int tegra_dsi_probe(struct platform_device *pdev) if (!pdev->dev.pm_domain) { dsi->rst = devm_reset_control_get(&pdev->dev, "dsi"); - if (IS_ERR(dsi->rst)) - return PTR_ERR(dsi->rst); + if (IS_ERR(dsi->rst)) { + err = PTR_ERR(dsi->rst); + goto remove; + } } dsi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(dsi->clk)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), - "cannot get DSI clock\n"); + if (IS_ERR(dsi->clk)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), + "cannot get DSI clock\n"); + goto remove; + } dsi->clk_lp = devm_clk_get(&pdev->dev, "lp"); - if (IS_ERR(dsi->clk_lp)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), - "cannot get low-power clock\n"); + if (IS_ERR(dsi->clk_lp)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), + "cannot get low-power clock\n"); + goto remove; + } dsi->clk_parent = devm_clk_get(&pdev->dev, "parent"); - if (IS_ERR(dsi->clk_parent)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), - "cannot get parent clock\n"); + if (IS_ERR(dsi->clk_parent)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), + "cannot get parent clock\n"); + goto remove; + } dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi"); - if (IS_ERR(dsi->vdd)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), - "cannot get VDD supply\n"); + if (IS_ERR(dsi->vdd)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), + "cannot get VDD supply\n"); + goto remove; + } err = tegra_dsi_setup_clocks(dsi); if (err < 0) { dev_err(&pdev->dev, "cannot setup clocks\n"); - return err; + goto remove; } regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); dsi->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(dsi->regs)) - return PTR_ERR(dsi->regs); + if (IS_ERR(dsi->regs)) { + err = PTR_ERR(dsi->regs); + goto remove; + } dsi->mipi = tegra_mipi_request(&pdev->dev, pdev->dev.of_node); - if (IS_ERR(dsi->mipi)) - return PTR_ERR(dsi->mipi); + if (IS_ERR(dsi->mipi)) { + err = PTR_ERR(dsi->mipi); + goto remove; + } dsi->host.ops = &tegra_dsi_host_ops; dsi->host.dev = &pdev->dev; @@ -1654,6 +1668,8 @@ unregister: mipi_dsi_host_unregister(&dsi->host); mipi_free: tegra_mipi_free(dsi->mipi); +remove: + tegra_output_remove(&dsi->output); return err; } -- GitLab From 806dc32deac2810cca6785d8389f0e2dfdc19b55 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:09 +0200 Subject: [PATCH 0907/1418] drm/tegra: dsi: Fix missing pm_runtime_disable() in the error handling path of tegra_dsi_probe() [ Upstream commit 5286a9fc280c45b6b307ee1b07f7a997e042252c ] If an error occurs after calling pm_runtime_enable(), pm_runtime_disable() should be called as already done in the remove function. Fixes: ef8187d75265 ("drm/tegra: dsi: Implement runtime PM") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/ee4a15c9cd4b574a55cd67c30d2411239ba2cee9.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 815e32e05f600..7bb26655cb3cc 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1665,6 +1665,7 @@ static int tegra_dsi_probe(struct platform_device *pdev) return 0; unregister: + pm_runtime_disable(&pdev->dev); mipi_dsi_host_unregister(&dsi->host); mipi_free: tegra_mipi_free(dsi->mipi); -- GitLab From 8bc95d34408bf87b01d94edcc57a2ea5099f177d Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 10 Jul 2023 11:23:41 +0800 Subject: [PATCH 0908/1418] drm/tegra: hdmi: Convert to devm_platform_ioremap_resource() [ Upstream commit faae5646c13f4697fd2ba29b10e38f9be5aa890a ] Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230710032355.72914-5-frank.li@vivo.com Stable-dep-of: 643ae131b859 ("drm/tegra: hdmi: Fix some error handling paths in tegra_hdmi_probe()") Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/hdmi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index bf240767dad9f..f83d9041327f0 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1776,7 +1776,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) static int tegra_hdmi_probe(struct platform_device *pdev) { struct tegra_hdmi *hdmi; - struct resource *regs; int err; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); @@ -1838,8 +1837,7 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) return err; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hdmi->regs = devm_ioremap_resource(&pdev->dev, regs); + hdmi->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hdmi->regs)) return PTR_ERR(hdmi->regs); -- GitLab From bb459d437d179fc8cb1f3ab8745b6f5cbde6fc08 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:10 +0200 Subject: [PATCH 0909/1418] drm/tegra: hdmi: Fix some error handling paths in tegra_hdmi_probe() [ Upstream commit 643ae131b8598fb2940c92c7d23fe62823a119c8 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: 59d29c0ec93f ("drm/tegra: Allocate resources at probe time") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/9b7c564eb71977678b20abd73ee52001a51cf327.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/hdmi.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index f83d9041327f0..c66764c0bd250 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1838,12 +1838,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) return err; hdmi->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(hdmi->regs)) - return PTR_ERR(hdmi->regs); + if (IS_ERR(hdmi->regs)) { + err = PTR_ERR(hdmi->regs); + goto remove; + } err = platform_get_irq(pdev, 0); if (err < 0) - return err; + goto remove; hdmi->irq = err; @@ -1852,18 +1854,18 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", hdmi->irq, err); - return err; + goto remove; } platform_set_drvdata(pdev, hdmi); err = devm_pm_runtime_enable(&pdev->dev); if (err) - return err; + goto remove; err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); if (err) - return err; + goto remove; INIT_LIST_HEAD(&hdmi->client.list); hdmi->client.ops = &hdmi_client_ops; @@ -1873,10 +1875,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); - return err; + goto remove; } return 0; + +remove: + tegra_output_remove(&hdmi->output); + return err; } static int tegra_hdmi_remove(struct platform_device *pdev) -- GitLab From 29f922ae4a0ac7b2cd5de58c48e5fc9a2d48cdfd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:11 +0200 Subject: [PATCH 0910/1418] drm/tegra: rgb: Fix some error handling paths in tegra_dc_rgb_probe() [ Upstream commit bc456b5d93dbfdbd89f2a036f4f3d8026595f9e4 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: 59d29c0ec93f ("drm/tegra: Allocate resources at probe time") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/0001f61eb89048bc36241629b564195689cf54b6.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/rgb.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index ff8fce36d2aa1..5e95943021887 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -214,26 +214,28 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) rgb->clk = devm_clk_get(dc->dev, NULL); if (IS_ERR(rgb->clk)) { dev_err(dc->dev, "failed to get clock\n"); - return PTR_ERR(rgb->clk); + err = PTR_ERR(rgb->clk); + goto remove; } rgb->clk_parent = devm_clk_get(dc->dev, "parent"); if (IS_ERR(rgb->clk_parent)) { dev_err(dc->dev, "failed to get parent clock\n"); - return PTR_ERR(rgb->clk_parent); + err = PTR_ERR(rgb->clk_parent); + goto remove; } err = clk_set_parent(rgb->clk, rgb->clk_parent); if (err < 0) { dev_err(dc->dev, "failed to set parent clock: %d\n", err); - return err; + goto remove; } rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0"); if (IS_ERR(rgb->pll_d_out0)) { err = PTR_ERR(rgb->pll_d_out0); dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err); - return err; + goto remove; } if (dc->soc->has_pll_d2_out0) { @@ -241,13 +243,17 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) if (IS_ERR(rgb->pll_d2_out0)) { err = PTR_ERR(rgb->pll_d2_out0); dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); - return err; + goto remove; } } dc->rgb = &rgb->output; return 0; + +remove: + tegra_output_remove(&rgb->output); + return err; } int tegra_dc_rgb_remove(struct tegra_dc *dc) -- GitLab From f3f407ccbe84a34de9be3195d22cdd5969f3fd9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:12 +0200 Subject: [PATCH 0911/1418] drm/tegra: rgb: Fix missing clk_put() in the error handling paths of tegra_dc_rgb_probe() [ Upstream commit 45c8034db47842b25a3ab6139d71e13b4e67b9b3 ] If clk_get_sys(..., "pll_d2_out0") fails, the clk_get_sys() call must be undone. Add the missing clk_put and a new 'put_pll_d_out0' label in the error handling path, and use it. Fixes: 0c921b6d4ba0 ("drm/tegra: dc: rgb: Allow changing PLLD rate on Tegra30+") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/0182895ead4e4730426616b0d9995954c960b634.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/rgb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 5e95943021887..86e55e5d12b39 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -243,7 +243,7 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) if (IS_ERR(rgb->pll_d2_out0)) { err = PTR_ERR(rgb->pll_d2_out0); dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); - goto remove; + goto put_pll; } } @@ -251,6 +251,8 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) return 0; +put_pll: + clk_put(rgb->pll_d_out0); remove: tegra_output_remove(&rgb->output); return err; -- GitLab From c994fed05c2715e5bd6821a0a6c472aaeccf1f9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:13 +0200 Subject: [PATCH 0912/1418] drm/tegra: output: Fix missing i2c_put_adapter() in the error handling paths of tegra_output_probe() [ Upstream commit 2db4578ef6ffb2b52115ca0ebf897b60ec559556 ] If an error occurs after a successful of_get_i2c_adapter_by_node() call, it should be undone by a corresponding i2c_put_adapter(). Add the missing i2c_put_adapter() call. Fixes: 9be7d864cf07 ("drm/tegra: Implement panel support") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/b38604178991e1f08b2cda219103be266be2d680.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/output.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 47d26b5d99456..7ccd010a821b7 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -139,8 +139,10 @@ int tegra_output_probe(struct tegra_output *output) GPIOD_IN, "HDMI hotplug detect"); if (IS_ERR(output->hpd_gpio)) { - if (PTR_ERR(output->hpd_gpio) != -ENOENT) - return PTR_ERR(output->hpd_gpio); + if (PTR_ERR(output->hpd_gpio) != -ENOENT) { + err = PTR_ERR(output->hpd_gpio); + goto put_i2c; + } output->hpd_gpio = NULL; } @@ -149,7 +151,7 @@ int tegra_output_probe(struct tegra_output *output) err = gpiod_to_irq(output->hpd_gpio); if (err < 0) { dev_err(output->dev, "gpiod_to_irq(): %d\n", err); - return err; + goto put_i2c; } output->hpd_irq = err; @@ -162,7 +164,7 @@ int tegra_output_probe(struct tegra_output *output) if (err < 0) { dev_err(output->dev, "failed to request IRQ#%u: %d\n", output->hpd_irq, err); - return err; + goto put_i2c; } output->connector.polled = DRM_CONNECTOR_POLL_HPD; @@ -176,6 +178,12 @@ int tegra_output_probe(struct tegra_output *output) } return 0; + +put_i2c: + if (output->ddc) + i2c_put_adapter(output->ddc); + + return err; } void tegra_output_remove(struct tegra_output *output) -- GitLab From e6ed73bb8e4bbae4ad85b710bc99cb1aa9d65e87 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 22 Dec 2023 18:41:54 +0100 Subject: [PATCH 0913/1418] drm/rockchip: inno_hdmi: Fix video timing [ Upstream commit 47a145c03484d33e65d773169d5ca1b9fe2a492e ] The controller wants the difference between *total and *sync_start in the HDMI_VIDEO_EXT_*DELAY registers. Otherwise the signal is very unstable for certain non-VIC modes. See downstream commit [0]. [0] https://github.com/rockchip-linux/kernel/commit/8eb559f2502c Fixes: 412d4ae6b7a5 ("drm/rockchip: hdmi: add Innosilicon HDMI support") Co-developed-by: Zheng Yang Signed-off-by: Zheng Yang Signed-off-by: Alex Bee Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231222174220.55249-4-knaerzche@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/inno_hdmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index f51774866f412..8f230f4c01bc3 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -411,7 +411,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_H, (value >> 8) & 0xFF); - value = mode->hsync_start - mode->hdisplay; + value = mode->htotal - mode->hsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_H, (value >> 8) & 0xFF); @@ -426,7 +426,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, value = mode->vtotal - mode->vdisplay; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VBLANK, value & 0xFF); - value = mode->vsync_start - mode->vdisplay; + value = mode->vtotal - mode->vsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VDELAY, value & 0xFF); value = mode->vsync_end - mode->vsync_start; -- GitLab From 758629d348b981f77385abd1bca6a026fb972458 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 8 Nov 2023 11:36:20 -0500 Subject: [PATCH 0914/1418] drm: Don't treat 0 as -1 in drm_fixp2int_ceil [ Upstream commit cf8837d7204481026335461629b84ac7f4538fa5 ] Unit testing this in VKMS shows that passing 0 into this function returns -1, which is highly counter- intuitive. Fix it by checking whether the input is >= 0 instead of > 0. Fixes: 64566b5e767f ("drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil") Signed-off-by: Harry Wentland Reviewed-by: Simon Ser Reviewed-by: Melissa Wen Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/20231108163647.106853-2-harry.wentland@amd.com Signed-off-by: Sasha Levin --- include/drm/drm_fixed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 03cb890690e83..6230088428cdb 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -94,7 +94,7 @@ static inline int drm_fixp2int_round(s64 a) static inline int drm_fixp2int_ceil(s64 a) { - if (a > 0) + if (a >= 0) return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); else return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); -- GitLab From 40624af6674745e174c754a20d7c53c250e65e7a Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 4 Dec 2023 17:14:16 +0800 Subject: [PATCH 0915/1418] drm/vmwgfx: fix a memleak in vmw_gmrid_man_get_node [ Upstream commit 89709105a6091948ffb6ec2427954cbfe45358ce ] When ida_alloc_max fails, resources allocated before should be freed, including *res allocated by kmalloc and ttm_resource_init. Fixes: d3bcb4b02fe9 ("drm/vmwgfx: switch the TTM backends to self alloc") Signed-off-by: Zhipeng Lu Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20231204091416.3308430-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 60e3cc537f365..b9e5c8cd31001 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -65,8 +65,11 @@ static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, ttm_resource_init(bo, place, *res); id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); - if (id < 0) + if (id < 0) { + ttm_resource_fini(man, *res); + kfree(*res); return id; + } spin_lock(&gman->lock); -- GitLab From fe790f340eb746202c6a6b274e39989b97d23d9e Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 20 Nov 2023 13:29:48 +0100 Subject: [PATCH 0916/1418] drm/rockchip: lvds: do not overwrite error code [ Upstream commit 79b09453c4e369ca81cfb670d0136d089e3b92f0 ] ret variable stores the return value of drm_of_find_panel_or_bridge which can return error codes different from EPROBE_DEFER. Therefore, let's just return that error code instead of forcing it to EPROBE_DEFER. Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231120-rk-lvds-defer-msg-v2-1-9c59a5779cf9@theobroma-systems.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_lvds.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index 68f6ebb33460b..1fde888cdd827 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -578,7 +578,6 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, goto err_put_port; } else if (ret) { DRM_DEV_ERROR(dev, "failed to find panel and bridge node\n"); - ret = -EPROBE_DEFER; goto err_put_port; } if (lvds->panel) -- GitLab From 06e38277151e5301b671614d273b8dc2d0486e5f Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 20 Nov 2023 13:29:49 +0100 Subject: [PATCH 0917/1418] drm/rockchip: lvds: do not print scary message when probing defer [ Upstream commit 52d11c863ac92e36a0365249f7f6d27ac48c78bc ] This scary message can misled the user into thinking something bad has happened and needs to be fixed, however it could simply be part of a normal boot process where EPROBE_DEFER is taken into account. Therefore, let's use dev_err_probe so that this message doesn't get shown (by default) when the return code is EPROBE_DEFER. Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231120-rk-lvds-defer-msg-v2-2-9c59a5779cf9@theobroma-systems.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index 1fde888cdd827..eb4a108c5bd2a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -577,7 +577,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, ret = -EINVAL; goto err_put_port; } else if (ret) { - DRM_DEV_ERROR(dev, "failed to find panel and bridge node\n"); + dev_err_probe(dev, ret, "failed to find panel and bridge node\n"); goto err_put_port; } if (lvds->panel) -- GitLab From 680c94312e8665da6517e4f4b8e4cf8e0ded64c7 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 20 Dec 2023 14:13:11 -0800 Subject: [PATCH 0918/1418] drm/panel-edp: use put_sync in unprepare [ Upstream commit 49ddab089611ae5ddd0201ddbbf633da75bfcc25 ] Some edp panel requires T10 (Delay from end of valid video data transmitted by the Source device to power-off) less than 500ms. Using autosuspend with delay set as 1000 violates this requirement. Use put_sync_suspend in unprepare to meet the spec. For other cases (such as getting EDID), it still uses autosuspend. Suggested-by: Douglas Anderson Fixes: 3235b0f20a0a ("drm/panel: panel-simple: Use runtime pm to avoid excessive unprepare / prepare") Signed-off-by: Hsin-Yi Wang Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231220221418.2610185-1-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-edp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 42584d8a9aeb6..bfcddd4aa9322 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -413,8 +413,7 @@ static int panel_edp_unprepare(struct drm_panel *panel) if (!p->prepared) return 0; - pm_runtime_mark_last_busy(panel->dev); - ret = pm_runtime_put_autosuspend(panel->dev); + ret = pm_runtime_put_sync_suspend(panel->dev); if (ret < 0) return ret; p->prepared = false; -- GitLab From f6d51a91b41704704e395de6839c667b0f810bbf Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 15:13:28 +0800 Subject: [PATCH 0919/1418] drm/lima: fix a memleak in lima_heap_alloc [ Upstream commit 04ae3eb470e52a3c41babe85ff8cee195e4dcbea ] When lima_vm_map_bo fails, the resources need to be deallocated, or there will be memleaks. Fixes: 6aebc51d7aef ("drm/lima: support heap buffer creation") Signed-off-by: Zhipeng Lu Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20240117071328.3811480-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/lima/lima_gem.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 0f1ca0b0db495..d72c5bf4e5ac1 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -75,29 +75,34 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm) } else { bo->base.sgt = kmalloc(sizeof(*bo->base.sgt), GFP_KERNEL); if (!bo->base.sgt) { - sg_free_table(&sgt); - return -ENOMEM; + ret = -ENOMEM; + goto err_out0; } } ret = dma_map_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); - if (ret) { - sg_free_table(&sgt); - kfree(bo->base.sgt); - bo->base.sgt = NULL; - return ret; - } + if (ret) + goto err_out1; *bo->base.sgt = sgt; if (vm) { ret = lima_vm_map_bo(vm, bo, old_size >> PAGE_SHIFT); if (ret) - return ret; + goto err_out2; } bo->heap_size = new_size; return 0; + +err_out2: + dma_unmap_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); +err_out1: + kfree(bo->base.sgt); + bo->base.sgt = NULL; +err_out0: + sg_free_table(&sgt); + return ret; } int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file, -- GitLab From 1eb749a9c30585a5758e7a10510ad3b5ca29d6ed Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 19 Dec 2023 05:07:21 +0200 Subject: [PATCH 0920/1418] ASoC: amd: acp: Add missing error handling in sof-mach [ Upstream commit d0ada20279db2649a7549a2b8a4a3379c59f238d ] Handle potential acp_sofdsp_dai_links_create() errors in ACP SOF machine driver's probe function. Note there is no need for an undo. While at it, switch to dev_err_probe(). Fixes: 9f84940f5004 ("ASoC: amd: acp: Add SOF audio support on Chrome board") Signed-off-by: Cristian Ciocaltea Reviewed-by: Emil Velikov Link: https://msgid.link/r/20231219030728.2431640-4-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/acp/acp-sof-mach.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index f19f064a75272..972600d271586 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -114,16 +114,14 @@ static int acp_sof_probe(struct platform_device *pdev) card->num_controls = ARRAY_SIZE(acp_controls); card->drvdata = (struct acp_card_drvdata *)pdev->id_entry->driver_data; - acp_sofdsp_dai_links_create(card); + ret = acp_sofdsp_dai_links_create(card); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to create DAI links\n"); ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) { - dev_err(&pdev->dev, - "devm_snd_soc_register_card(%s) failed: %d\n", - card->name, ret); - return ret; - } - + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register card(%s)\n", card->name); return 0; } -- GitLab From b17195f1e01598045756038c7f4a0683a7084030 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 12 Jan 2024 09:32:56 +0000 Subject: [PATCH 0921/1418] dmaengine: tegra210-adma: Update dependency to ARCH_TEGRA [ Upstream commit 33b7db45533af240fe44e809f9dc4d604cf82d07 ] Update the architecture dependency to be the generic Tegra because the driver works on the four latest Tegra generations not just T210, if you build a kernel with a specific ARCH_TEGRA_xxx_SOC option that excludes 210 you don't get this driver. Fixes: 433de642a76c9 ("dmaengine: tegra210-adma: add support for Tegra186/Tegra194") Signed-off-by: Peter Robinson Cc: Jon Hunter Cc: Thierry Reding Cc: Sameer Pujar Cc: Laxman Dewangan Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/20240112093310.329642-2-pbrobinson@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81de833ccd041..66ef0a1114845 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -665,16 +665,16 @@ config TEGRA20_APB_DMA config TEGRA210_ADMA tristate "NVIDIA Tegra210 ADMA support" - depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST) + depends on (ARCH_TEGRA || COMPILE_TEST) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support for the NVIDIA Tegra210 ADMA controller driver. The - DMA controller has multiple DMA channels and is used to service - various audio clients in the Tegra210 audio processing engine - (APE). This DMA controller transfers data from memory to - peripheral and vice versa. It does not support memory to - memory data transfer. + Support for the NVIDIA Tegra210/Tegra186/Tegra194/Tegra234 ADMA + controller driver. The DMA controller has multiple DMA channels + and is used to service various audio clients in the Tegra210 + audio processing engine (APE). This DMA controller transfers + data from memory to peripheral and vice versa. It does not + support memory to memory data transfer. config TIMB_DMA tristate "Timberdale FPGA DMA support" -- GitLab From 8ba8db9786b55047df5ad3db3e01dd886687a77d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:01:11 +0100 Subject: [PATCH 0922/1418] media: tc358743: register v4l2 async device only after successful setup [ Upstream commit 87399f1ff92203d65f1febf5919429f4bb613a02 ] Ensure the device has been setup correctly before registering the v4l2 async device, thus allowing userspace to access. Signed-off-by: Alexander Stein Reviewed-by: Robert Foss Fixes: 4c5211a10039 ("[media] tc358743: register v4l2 asynchronous subdevice") Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240110090111.458115-1-alexander.stein@ew.tq-group.com Signed-off-by: Sasha Levin --- drivers/media/i2c/tc358743.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index 200841c1f5cf0..68628ccecd161 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -2094,9 +2094,6 @@ static int tc358743_probe(struct i2c_client *client) state->mbus_fmt_code = MEDIA_BUS_FMT_RGB888_1X24; sd->dev = &client->dev; - err = v4l2_async_register_subdev(sd); - if (err < 0) - goto err_hdl; mutex_init(&state->confctl_mutex); @@ -2154,6 +2151,10 @@ static int tc358743_probe(struct i2c_client *client) if (err) goto err_work_queues; + err = v4l2_async_register_subdev(sd); + if (err < 0) + goto err_work_queues; + v4l2_info(sd, "%s found @ 0x%x (%s)\n", client->name, client->addr << 1, client->adapter->name); -- GitLab From 43f4364c8f4ebf98da66acf11b65c44289d02deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 18 Jan 2024 13:08:15 +0200 Subject: [PATCH 0923/1418] PCI/DPC: Print all TLP Prefixes, not just the first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6568d82512b0a64809acff3d7a747362fa4288c8 ] The TLP Prefix Log Register consists of multiple DWORDs (PCIe r6.1 sec 7.9.14.13) but the loop in dpc_process_rp_pio_error() keeps reading from the first DWORD, so we print only the first PIO TLP Prefix (duplicated several times), and we never print the second, third, etc., Prefixes. Add the iteration count based offset calculation into the config read. Fixes: f20c4ea49ec4 ("PCI/DPC: Add eDPC support") Link: https://lore.kernel.org/r/20240118110815.3867-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: add user-visible details to commit log] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index a5d7c69b764e0..08800282825e1 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -231,7 +231,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); } clear_status: -- GitLab From 845a478304f0faadecae77c821a1574efdfe92dd Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Fri, 19 Jan 2024 04:03:02 +0000 Subject: [PATCH 0924/1418] perf record: Fix possible incorrect free in record__switch_output() [ Upstream commit aff10a165201f6f60cff225083ce301ad3f5d8f1 ] perf_data__switch() may not assign a legal value to 'new_filename'. In this case, 'new_filename' uses the on-stack value, which may cause a incorrect free and unexpected result. Fixes: 03724b2e9c45 ("perf record: Allow to limit number of reported perf.data files") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240119040304.3708522-2-yangjihong1@huawei.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7314183cdcb6c..b9b0fda8374e2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1785,8 +1785,8 @@ static int record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; + char *new_filename = NULL; int fd, err; - char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; -- GitLab From fa8810aa5ac3e21a45fd2b28d47e49683f29635f Mon Sep 17 00:00:00 2001 From: Mikhail Khvainitski Date: Sat, 23 Dec 2023 21:12:13 +0200 Subject: [PATCH 0925/1418] HID: lenovo: Add middleclick_workaround sysfs knob for cptkbd [ Upstream commit 2814646f76f8518326964f12ff20aaee70ba154d ] Previous attempt to autodetect well-behaving patched firmware introduced in commit 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") has shown that there are false-positives on original firmware (on both 1st gen and 2nd gen keyboards) which causes the middle button click workaround to be mistakenly disabled. This commit adds explicit parameter to sysfs to control this workaround. Fixes: 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") Fixes: 43527a0094c1 ("HID: lenovo: Restrict detection of patched firmware only to USB cptkbd") Signed-off-by: Mikhail Khvainitski Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 57 +++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 149a3c74346b4..f86c1ea83a037 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -54,10 +54,10 @@ struct lenovo_drvdata { /* 0: Up * 1: Down (undecided) * 2: Scrolling - * 3: Patched firmware, disable workaround */ u8 middlebutton_state; bool fn_lock; + bool middleclick_workaround_cptkbd; }; #define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) @@ -621,6 +621,36 @@ static ssize_t attr_sensitivity_store_cptkbd(struct device *dev, return count; } +static ssize_t attr_middleclick_workaround_show_cptkbd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + + return snprintf(buf, PAGE_SIZE, "%u\n", + cptkbd_data->middleclick_workaround_cptkbd); +} + +static ssize_t attr_middleclick_workaround_store_cptkbd(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + int value; + + if (kstrtoint(buf, 10, &value)) + return -EINVAL; + if (value < 0 || value > 1) + return -EINVAL; + + cptkbd_data->middleclick_workaround_cptkbd = !!value; + + return count; +} + static struct device_attribute dev_attr_fn_lock = __ATTR(fn_lock, S_IWUSR | S_IRUGO, @@ -632,10 +662,16 @@ static struct device_attribute dev_attr_sensitivity_cptkbd = attr_sensitivity_show_cptkbd, attr_sensitivity_store_cptkbd); +static struct device_attribute dev_attr_middleclick_workaround_cptkbd = + __ATTR(middleclick_workaround, S_IWUSR | S_IRUGO, + attr_middleclick_workaround_show_cptkbd, + attr_middleclick_workaround_store_cptkbd); + static struct attribute *lenovo_attributes_cptkbd[] = { &dev_attr_fn_lock.attr, &dev_attr_sensitivity_cptkbd.attr, + &dev_attr_middleclick_workaround_cptkbd.attr, NULL }; @@ -686,23 +722,7 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, { struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); - if (cptkbd_data->middlebutton_state != 3) { - /* REL_X and REL_Y events during middle button pressed - * are only possible on patched, bug-free firmware - * so set middlebutton_state to 3 - * to never apply workaround anymore - */ - if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD && - cptkbd_data->middlebutton_state == 1 && - usage->type == EV_REL && - (usage->code == REL_X || usage->code == REL_Y)) { - cptkbd_data->middlebutton_state = 3; - /* send middle button press which was hold before */ - input_event(field->hidinput->input, - EV_KEY, BTN_MIDDLE, 1); - input_sync(field->hidinput->input); - } - + if (cptkbd_data->middleclick_workaround_cptkbd) { /* "wheel" scroll events */ if (usage->type == EV_REL && (usage->code == REL_WHEEL || usage->code == REL_HWHEEL)) { @@ -1166,6 +1186,7 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev) cptkbd_data->middlebutton_state = 0; cptkbd_data->fn_lock = true; cptkbd_data->sensitivity = 0x05; + cptkbd_data->middleclick_workaround_cptkbd = true; lenovo_features_set_cptkbd(hdev); ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_cptkbd); -- GitLab From d346b3e5b25c95d504478507eb867cd3818775ab Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 23 Jan 2024 20:18:07 +0530 Subject: [PATCH 0926/1418] drm/amd/display: Fix a potential buffer overflow in 'dp_dsc_clock_en_read()' [ Upstream commit 4b09715f1504f1b6e8dff0e9643630610bc05141 ] Tell snprintf() to store at most 10 bytes in the output buffer instead of 30. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1508 dp_dsc_clock_en_read() error: snprintf() is printing too much 30 vs 10 Fixes: c06e09b76639 ("drm/amd/display: Add DSC parameters logging to debugfs") Cc: Alex Hung Cc: Qingqing Zhuo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index ee242d9d8b060..ff7dd17ad0763 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1358,7 +1358,7 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, const uint32_t rd_buf_size = 10; struct pipe_ctx *pipe_ctx; ssize_t result = 0; - int i, r, str_len = 30; + int i, r, str_len = 10; rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL); -- GitLab From 29fde8895b2fcc33f44aea28c644ce2d9b62f9e0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jan 2024 21:16:04 +0530 Subject: [PATCH 0927/1418] drm/amd/display: Fix potential NULL pointer dereferences in 'dcn10_set_output_transfer_func()' [ Upstream commit 9ccfe80d022df7c595f1925afb31de2232900656 ] The 'stream' pointer is used in dcn10_set_output_transfer_func() before the check if 'stream' is NULL. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn10/dcn10_hwseq.c:1892 dcn10_set_output_transfer_func() warn: variable dereferenced before check 'stream' (see line 1875) Fixes: ddef02de0d71 ("drm/amd/display: add null checks before logging") Cc: Wyatt Wood Cc: Anthony Koo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Anthony Koo Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 009b5861a3fec..d6c5d48c878ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1854,6 +1854,9 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, { struct dpp *dpp = pipe_ctx->plane_res.dpp; + if (!stream) + return false; + if (dpp == NULL) return false; @@ -1876,8 +1879,8 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, } else dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_BYPASS); - if (stream != NULL && stream->ctx != NULL && - stream->out_transfer_func != NULL) { + if (stream->ctx && + stream->out_transfer_func) { log_tf(stream->ctx, stream->out_transfer_func, dpp->regamma_params.hw_points_num); -- GitLab From a19403d20ed86930fbcf14434ff87737691f28ef Mon Sep 17 00:00:00 2001 From: Linh Phung Date: Thu, 2 Feb 2023 01:03:09 +0000 Subject: [PATCH 0928/1418] pinctrl: renesas: r8a779g0: Add Audio SSI pins, groups, and functions [ Upstream commit b37d57e1daccbc1a0393d9207d5c48f9181fe85a ] Add pins, groups, and functions for the Serial Sound Interface (SSI) on the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Linh Phung Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/87bkmcang2.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 68540257cdf1 ("pinctrl: renesas: r8a779g0: Add missing SCIF_CLK2 pin group/function") Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/pfc-r8a779g0.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/pinctrl/renesas/pfc-r8a779g0.c b/drivers/pinctrl/renesas/pfc-r8a779g0.c index 43a63a21a6fb5..14774163df354 100644 --- a/drivers/pinctrl/renesas/pfc-r8a779g0.c +++ b/drivers/pinctrl/renesas/pfc-r8a779g0.c @@ -2360,6 +2360,22 @@ static const unsigned int scif_clk_mux[] = { SCIF_CLK_MARK, }; +/* - SSI ------------------------------------------------- */ +static const unsigned int ssi_data_pins[] = { + /* SSI_SD */ + RCAR_GP_PIN(1, 20), +}; +static const unsigned int ssi_data_mux[] = { + SSI_SD_MARK, +}; +static const unsigned int ssi_ctrl_pins[] = { + /* SSI_SCK, SSI_WS */ + RCAR_GP_PIN(1, 18), RCAR_GP_PIN(1, 19), +}; +static const unsigned int ssi_ctrl_mux[] = { + SSI_SCK_MARK, SSI_WS_MARK, +}; + /* - TPU ------------------------------------------------------------------- */ static const unsigned int tpu_to0_pins[] = { /* TPU0TO0 */ @@ -2652,6 +2668,9 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scif4_ctrl), SH_PFC_PIN_GROUP(scif_clk), + SH_PFC_PIN_GROUP(ssi_data), + SH_PFC_PIN_GROUP(ssi_ctrl), + SH_PFC_PIN_GROUP(tpu_to0), /* suffix might be updated */ SH_PFC_PIN_GROUP(tpu_to0_a), /* suffix might be updated */ SH_PFC_PIN_GROUP(tpu_to1), /* suffix might be updated */ @@ -2964,6 +2983,11 @@ static const char * const scif_clk_groups[] = { "scif_clk", }; +static const char * const ssi_groups[] = { + "ssi_data", + "ssi_ctrl", +}; + static const char * const tpu_groups[] = { /* suffix might be updated */ "tpu_to0", @@ -3045,6 +3069,8 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scif4), SH_PFC_FUNCTION(scif_clk), + SH_PFC_FUNCTION(ssi), + SH_PFC_FUNCTION(tpu), SH_PFC_FUNCTION(tsn0), -- GitLab From 29eaa9246b0da536912125f32e47f69a62097d6a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 18 Jan 2024 17:32:36 +0100 Subject: [PATCH 0929/1418] pinctrl: renesas: r8a779g0: Add missing SCIF_CLK2 pin group/function [ Upstream commit 68540257cdf1d07ff8a649aa94c21c5804bbb9b0 ] R-Car V4H actually has two SCIF_CLK pins. The second pin provides the SCIF_CLK signal for HSCIF2 and SCIF4. Fixes: 050442ae4c74f830 ("pinctrl: renesas: r8a779g0: Add pins, groups and functions") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/6352ec9b63fdd38c2c70d8d203e46f21fbfeccdc.1705589612.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/pfc-r8a779g0.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pinctrl/renesas/pfc-r8a779g0.c b/drivers/pinctrl/renesas/pfc-r8a779g0.c index 14774163df354..acf7664ea835b 100644 --- a/drivers/pinctrl/renesas/pfc-r8a779g0.c +++ b/drivers/pinctrl/renesas/pfc-r8a779g0.c @@ -2360,6 +2360,14 @@ static const unsigned int scif_clk_mux[] = { SCIF_CLK_MARK, }; +static const unsigned int scif_clk2_pins[] = { + /* SCIF_CLK2 */ + RCAR_GP_PIN(8, 11), +}; +static const unsigned int scif_clk2_mux[] = { + SCIF_CLK2_MARK, +}; + /* - SSI ------------------------------------------------- */ static const unsigned int ssi_data_pins[] = { /* SSI_SD */ @@ -2667,6 +2675,7 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scif4_clk), SH_PFC_PIN_GROUP(scif4_ctrl), SH_PFC_PIN_GROUP(scif_clk), + SH_PFC_PIN_GROUP(scif_clk2), SH_PFC_PIN_GROUP(ssi_data), SH_PFC_PIN_GROUP(ssi_ctrl), @@ -2983,6 +2992,10 @@ static const char * const scif_clk_groups[] = { "scif_clk", }; +static const char * const scif_clk2_groups[] = { + "scif_clk2", +}; + static const char * const ssi_groups[] = { "ssi_data", "ssi_ctrl", @@ -3068,6 +3081,7 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scif3), SH_PFC_FUNCTION(scif4), SH_PFC_FUNCTION(scif_clk), + SH_PFC_FUNCTION(scif_clk2), SH_PFC_FUNCTION(ssi), -- GitLab From 9310d3de984cf2a51eb966bb0bbd8fd9711c16e4 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Wed, 24 Jan 2024 19:38:56 -0600 Subject: [PATCH 0930/1418] clk: samsung: exynos850: Propagate SPI IPCLK rate change [ Upstream commit 67c15187d4910ee353374676d4dddf09d8cb227e ] When SPI transfer is being prepared, the spi-s3c64xx driver will call clk_set_rate() to change the rate of SPI source clock (IPCLK). But IPCLK is a gate (leaf) clock, so it must propagate the rate change up the clock tree, so that corresponding DIV clocks can actually change their divider values. Add CLK_SET_RATE_PARENT flag to corresponding clocks for all SPI instances in Exynos850 (spi_0, spi_1 and spi_2) to make it possible. This change involves next clocks: usi_spi_0: Clock Block Div range -------------------------------------------- gout_spi0_ipclk CMU_PERI - dout_peri_spi0 CMU_PERI /1..32 mout_peri_spi_user CMU_PERI - dout_peri_ip CMU_TOP /1..16 usi_cmgp0: Clock Block Div range -------------------------------------------- gout_cmgp_usi0_ipclk CMU_CMGP - dout_cmgp_usi0 CMU_CMGP /1..32 mout_cmgp_usi0 CMU_CMGP - gout_clkcmu_cmgp_bus CMU_APM - dout_apm_bus CMU_APM /1..8 usi_cmgp1: Clock Block Div range -------------------------------------------- gout_cmgp_usi1_ipclk CMU_CMGP - dout_cmgp_usi1 CMU_CMGP /1..32 mout_cmgp_usi1 CMU_CMGP - gout_clkcmu_cmgp_bus CMU_APM - dout_apm_bus CMU_APM /1..8 With input clock of 400 MHz, this scheme provides next IPCLK rate range, for each SPI block: SPI0: 781 kHz ... 400 MHz SPI1/2: 1.6 MHz ... 400 MHz Accounting for internal /4 divider in SPI blocks, and because the max SPI frequency is limited at 50 MHz, it gives us next SPI SCK rates: SPI0: 200 kHz ... 49.9 MHz SPI1/2: 400 kHz ... 49.9 MHz Which should cover all possible applications of SPI bus. Of course, setting SPI frequency to values as low as 500 kHz will also affect the common bus dividers (dout_apm_bus or dout_peri_ip), which in turn effectively lowers the rates for all leaf bus clocks derived from those dividers, like HSI2C and I3C clocks. But at least it gives the board designer a choice, whether to keep all clocks (SPI/HSI2C/I3C) at high frequencies, or make all those clocks have lower frequencies. Not propagating the rate change to those common dividers would limit this choice to "only high frequencies are allowed for SPI/HSI2C/I3C" option, making the common dividers useless. This decision follows the "Worse is better" approach, relying on the users/engineers to know the system internals when working with such low-level features, instead of trying to account for all possible use-cases. Fixes: 7dd05578198b ("clk: samsung: Introduce Exynos850 clock driver") Signed-off-by: Sam Protsenko Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240125013858.3986-2-semen.protsenko@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos850.c | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos850.c b/drivers/clk/samsung/clk-exynos850.c index 541761e96aeb6..87e463ad42741 100644 --- a/drivers/clk/samsung/clk-exynos850.c +++ b/drivers/clk/samsung/clk-exynos850.c @@ -572,7 +572,7 @@ static const struct samsung_div_clock apm_div_clks[] __initconst = { static const struct samsung_gate_clock apm_gate_clks[] __initconst = { GATE(CLK_GOUT_CLKCMU_CMGP_BUS, "gout_clkcmu_cmgp_bus", "dout_apm_bus", - CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, 0, 0), + CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CLKCMU_CHUB_BUS, "gout_clkcmu_chub_bus", "mout_clkcmu_chub_bus", CLK_CON_GAT_GATE_CLKCMU_CHUB_BUS, 21, 0, 0), @@ -936,19 +936,19 @@ static const struct samsung_fixed_rate_clock cmgp_fixed_clks[] __initconst = { static const struct samsung_mux_clock cmgp_mux_clks[] __initconst = { MUX(CLK_MOUT_CMGP_ADC, "mout_cmgp_adc", mout_cmgp_adc_p, CLK_CON_MUX_CLK_CMGP_ADC, 0, 1), - MUX(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1), - MUX(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1), + MUX_F(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1, CLK_SET_RATE_PARENT, 0), + MUX_F(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock cmgp_div_clks[] __initconst = { DIV(CLK_DOUT_CMGP_ADC, "dout_cmgp_adc", "gout_clkcmu_cmgp_bus", CLK_CON_DIV_DIV_CLK_CMGP_ADC, 0, 4), - DIV(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5), - DIV(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5), + DIV_F(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5, CLK_SET_RATE_PARENT, 0), + DIV_F(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { @@ -963,12 +963,12 @@ static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_GPIO_PCLK, 21, CLK_IGNORE_UNUSED, 0), GATE(CLK_GOUT_CMGP_USI0_IPCLK, "gout_cmgp_usi0_ipclk", "dout_cmgp_usi0", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI0_PCLK, "gout_cmgp_usi0_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_PCLK, 21, 0, 0), GATE(CLK_GOUT_CMGP_USI1_IPCLK, "gout_cmgp_usi1_ipclk", "dout_cmgp_usi1", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI1_PCLK, "gout_cmgp_usi1_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_PCLK, 21, 0, 0), @@ -1409,8 +1409,9 @@ static const struct samsung_mux_clock peri_mux_clks[] __initconst = { mout_peri_uart_user_p, PLL_CON0_MUX_CLKCMU_PERI_UART_USER, 4, 1), MUX(CLK_MOUT_PERI_HSI2C_USER, "mout_peri_hsi2c_user", mout_peri_hsi2c_user_p, PLL_CON0_MUX_CLKCMU_PERI_HSI2C_USER, 4, 1), - MUX(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", mout_peri_spi_user_p, - PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1), + MUX_F(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", + mout_peri_spi_user_p, PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1, + CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock peri_div_clks[] __initconst = { @@ -1420,8 +1421,8 @@ static const struct samsung_div_clock peri_div_clks[] __initconst = { CLK_CON_DIV_DIV_CLK_PERI_HSI2C_1, 0, 5), DIV(CLK_DOUT_PERI_HSI2C2, "dout_peri_hsi2c2", "gout_peri_hsi2c2", CLK_CON_DIV_DIV_CLK_PERI_HSI2C_2, 0, 5), - DIV(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", - CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5), + DIV_F(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", + CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock peri_gate_clks[] __initconst = { @@ -1463,7 +1464,7 @@ static const struct samsung_gate_clock peri_gate_clks[] __initconst = { "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_PWM_MOTOR_PCLK, 21, 0, 0), GATE(CLK_GOUT_SPI0_IPCLK, "gout_spi0_ipclk", "dout_peri_spi0", - CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_SPI0_PCLK, "gout_spi0_pclk", "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_SPI_0_PCLK, 21, 0, 0), GATE(CLK_GOUT_SYSREG_PERI_PCLK, "gout_sysreg_peri_pclk", -- GitLab From 83fe9c3f445d7b94dc4ff61f454618a5bdd7bf38 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 27 Jan 2024 02:57:56 +0000 Subject: [PATCH 0931/1418] perf evsel: Fix duplicate initialization of data->id in evsel__parse_sample() [ Upstream commit 4962aec0d684c8edb14574ccd0da53e4926ff834 ] data->id has been initialized at line 2362, remove duplicate initialization. Fixes: 3ad31d8a0df2 ("perf evsel: Centralize perf_sample initialization") Signed-off-by: Yang Jihong Reviewed-by: Arnaldo Carvalho de Melo Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240127025756.4041808-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- tools/perf/util/evsel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde35078..7db35dbdfcefe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2375,7 +2375,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->period = evsel->core.attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; data->misc = event->header.misc; - data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; -- GitLab From 7ae1b0dc12ec407f12f80b49d22c6ad2308e2202 Mon Sep 17 00:00:00 2001 From: Igor Prusov Date: Fri, 2 Feb 2024 17:25:48 +0300 Subject: [PATCH 0932/1418] clk: meson: Add missing clocks to axg_clk_regmaps [ Upstream commit ba535bce57e71463a86f8b33a0ea88c26e3a6418 ] Some clocks were missing from axg_clk_regmaps, which caused kernel panic during cat /sys/kernel/debug/clk/clk_summary [ 57.349402] Unable to handle kernel NULL pointer dereference at virtual address 00000000000001fc ... [ 57.430002] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 57.436900] pc : regmap_read+0x1c/0x88 [ 57.440608] lr : clk_regmap_gate_is_enabled+0x3c/0xb0 [ 57.445611] sp : ffff800082f1b690 [ 57.448888] x29: ffff800082f1b690 x28: 0000000000000000 x27: ffff800080eb9a70 [ 57.455961] x26: 0000000000000007 x25: 0000000000000016 x24: 0000000000000000 [ 57.463033] x23: ffff800080e8b488 x22: 0000000000000015 x21: ffff00000e7e7000 [ 57.470106] x20: ffff00000400ec00 x19: 0000000000000000 x18: ffffffffffffffff [ 57.477178] x17: 0000000000000000 x16: 0000000000000000 x15: ffff0000042a3000 [ 57.484251] x14: 0000000000000000 x13: ffff0000042a2fec x12: 0000000005f5e100 [ 57.491323] x11: abcc77118461cefd x10: 0000000000000020 x9 : ffff8000805e4b24 [ 57.498396] x8 : ffff0000028063c0 x7 : ffff800082f1b710 x6 : ffff800082f1b710 [ 57.505468] x5 : 00000000ffffffd0 x4 : ffff800082f1b6e0 x3 : 0000000000001000 [ 57.512541] x2 : ffff800082f1b6e4 x1 : 000000000000012c x0 : 0000000000000000 [ 57.519615] Call trace: [ 57.522030] regmap_read+0x1c/0x88 [ 57.525393] clk_regmap_gate_is_enabled+0x3c/0xb0 [ 57.530050] clk_core_is_enabled+0x44/0x120 [ 57.534190] clk_summary_show_subtree+0x154/0x2f0 [ 57.538847] clk_summary_show_subtree+0x220/0x2f0 [ 57.543505] clk_summary_show_subtree+0x220/0x2f0 [ 57.548162] clk_summary_show_subtree+0x220/0x2f0 [ 57.552820] clk_summary_show_subtree+0x220/0x2f0 [ 57.557477] clk_summary_show_subtree+0x220/0x2f0 [ 57.562135] clk_summary_show_subtree+0x220/0x2f0 [ 57.566792] clk_summary_show_subtree+0x220/0x2f0 [ 57.571450] clk_summary_show+0x84/0xb8 [ 57.575245] seq_read_iter+0x1bc/0x4b8 [ 57.578954] seq_read+0x8c/0xd0 [ 57.582059] full_proxy_read+0x68/0xc8 [ 57.585767] vfs_read+0xb0/0x268 [ 57.588959] ksys_read+0x70/0x108 [ 57.592236] __arm64_sys_read+0x24/0x38 [ 57.596031] invoke_syscall+0x50/0x128 [ 57.599740] el0_svc_common.constprop.0+0x48/0xf8 [ 57.604397] do_el0_svc+0x28/0x40 [ 57.607675] el0_svc+0x34/0xb8 [ 57.610694] el0t_64_sync_handler+0x13c/0x158 [ 57.615006] el0t_64_sync+0x190/0x198 [ 57.618635] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (b941fc00) [ 57.624668] ---[ end trace 0000000000000000 ]--- [jbrunet: add missing Fixes tag] Signed-off-by: Igor Prusov Link: https://lore.kernel.org/r/20240202172537.1.I64656c75d84284bc91e6126b50b33c502be7c42a@changeid Fixes: 14ebb3154b8f ("clk: meson: axg: add Video Clocks") Signed-off-by: Jerome Brunet Signed-off-by: Sasha Levin --- drivers/clk/meson/axg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index 2ad3801398dc1..7802dabb26f6d 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -2144,7 +2144,9 @@ static struct clk_regmap *const axg_clk_regmaps[] = { &axg_vclk_input, &axg_vclk2_input, &axg_vclk_div, + &axg_vclk_div1, &axg_vclk2_div, + &axg_vclk2_div1, &axg_vclk_div2_en, &axg_vclk_div4_en, &axg_vclk_div6_en, -- GitLab From 3828fc3ab84cea74dd41a143456b0238ba2b56c5 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 12 Jan 2024 05:42:26 -0800 Subject: [PATCH 0933/1418] media: em28xx: annotate unchecked call to media_device_register() [ Upstream commit fd61d77a3d28444b2635f0c8b5a2ecd6a4d94026 ] Static analyzers generate alerts for an unchecked call to `media_device_register()`. However, in this case, the device will work reliably without the media controller API. Add a comment above the call to prevent future unnecessary changes. Suggested-by: Mauro Carvalho Chehab Fixes: 37ecc7b1278f ("[media] em28xx: add media controller support") Signed-off-by: Nikita Zhandarovich Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/em28xx/em28xx-cards.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 4d037c92af7c5..bae76023cf71d 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -4094,6 +4094,10 @@ static int em28xx_usb_probe(struct usb_interface *intf, * topology will likely change after the load of the em28xx subdrivers. */ #ifdef CONFIG_MEDIA_CONTROLLER + /* + * No need to check the return value, the device will still be + * usable without media controller API. + */ retval = media_device_register(dev->media_dev); #endif -- GitLab From 6bf5c2fade8ed53b2d26fa9875e5b04f36c7145d Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:47:53 +0800 Subject: [PATCH 0934/1418] media: v4l2-tpg: fix some memleaks in tpg_alloc [ Upstream commit 8cf9c5051076e0eb958f4361d50d8b0c3ee6691c ] In tpg_alloc, resources should be deallocated in each and every error-handling paths, since they are allocated in for statements. Otherwise there would be memleaks because tpg_free is called only when tpg_alloc return 0. Fixes: 63881df94d3e ("[media] vivid: add the Test Pattern Generator") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 52 +++++++++++++++---- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index 303d02b1d71c9..fe30f5b0050dd 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -113,6 +113,7 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) { unsigned pat; unsigned plane; + int ret = 0; tpg->max_line_width = max_w; for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) { @@ -121,14 +122,18 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->lines[pat][plane]) - return -ENOMEM; + if (!tpg->lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } if (plane == 0) continue; tpg->downsampled_lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->downsampled_lines[pat][plane]) - return -ENOMEM; + if (!tpg->downsampled_lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } } } for (plane = 0; plane < TPG_MAX_PLANES; plane++) { @@ -136,18 +141,45 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->contrast_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->contrast_line[plane]) - return -ENOMEM; + if (!tpg->contrast_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->black_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->black_line[plane]) - return -ENOMEM; + if (!tpg->black_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->random_line[plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->random_line[plane]) - return -ENOMEM; + if (!tpg->random_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } } return 0; + +free_contrast_line: + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->contrast_line[plane]); + vfree(tpg->black_line[plane]); + vfree(tpg->random_line[plane]); + tpg->contrast_line[plane] = NULL; + tpg->black_line[plane] = NULL; + tpg->random_line[plane] = NULL; + } +free_lines: + for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->lines[pat][plane]); + tpg->lines[pat][plane] = NULL; + if (plane == 0) + continue; + vfree(tpg->downsampled_lines[pat][plane]); + tpg->downsampled_lines[pat][plane] = NULL; + } + return ret; } EXPORT_SYMBOL_GPL(tpg_alloc); -- GitLab From 0c9550b032de48d6a7fa6a4ddc09699d64d9300d Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:48:44 +0800 Subject: [PATCH 0935/1418] media: v4l2-mem2mem: fix a memleak in v4l2_m2m_register_entity [ Upstream commit 8f94b49a5b5d386c038e355bef6347298aabd211 ] The entity->name (i.e. name) is allocated in v4l2_m2m_register_entity but isn't freed in its following error-handling paths. This patch adds such deallocation to prevent memleak of entity->name. Fixes: be2fff656322 ("media: add helpers for memory-to-memory media controller") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/v4l2-core/v4l2-mem2mem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index be7fde1ed3eaa..97645d6509e1c 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -1084,11 +1084,17 @@ static int v4l2_m2m_register_entity(struct media_device *mdev, entity->function = function; ret = media_entity_pads_init(entity, num_pads, pads); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } ret = media_device_register_entity(mdev, entity); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } return 0; } -- GitLab From 437a111f79a2f5b2a5f21e27fdec6f40c8768712 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Sat, 3 Feb 2024 14:40:43 +0100 Subject: [PATCH 0936/1418] media: edia: dvbdev: fix a use-after-free [ Upstream commit 8c64f4cdf4e6cc5682c52523713af8c39c94e6d5 ] In dvb_register_device, *pdvbdev is set equal to dvbdev, which is freed in several error-handling paths. However, *pdvbdev is not set to NULL after dvbdev's deallocation, causing use-after-frees in many places, for example, in the following call chain: budget_register |-> dvb_dmxdev_init |-> dvb_register_device |-> dvb_dmxdev_release |-> dvb_unregister_device |-> dvb_remove_device |-> dvb_device_put |-> kref_put When calling dvb_unregister_device, dmxdev->dvbdev (i.e. *pdvbdev in dvb_register_device) could point to memory that had been freed in dvb_register_device. Thereafter, this pointer is transferred to kref_put and triggering a use-after-free. Link: https://lore.kernel.org/linux-media/20240203134046.3120099-1-alexious@zju.edu.cn Fixes: b61901024776 ("V4L/DVB (5244): Dvbdev: fix illegal re-usage of fileoperations struct") Signed-off-by: Zhipeng Lu Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-core/dvbdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index d352e028491aa..aefee2277254d 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -494,6 +494,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); if (!dvbdevfops) { kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -502,6 +503,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, if (!new_node) { kfree(dvbdevfops); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -535,6 +537,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); return -EINVAL; @@ -557,6 +560,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return ret; } @@ -575,6 +579,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } -- GitLab From f69b926799b3d0a01376f77c86f3697462c913a2 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 Jan 2024 15:19:07 +0800 Subject: [PATCH 0937/1418] pinctrl: mediatek: Drop bogus slew rate register range for MT8186 [ Upstream commit 3a29c87548809405bcbc66acc69cbe6f15184d94 ] The MT8186 does not support configuring pin slew rate. This is evident from both the datasheet, and the fact that the driver points the slew rate register range at the GPIO direction register range. Drop the bogus setting. Fixes: 8b483bda1e46 ("pinctrl: add pinctrl driver on mt8186") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240131071910.3950450-1-wenst@chromium.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8186.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8186.c b/drivers/pinctrl/mediatek/pinctrl-mt8186.c index a02f7c3269707..09edcf47effec 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8186.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8186.c @@ -1198,7 +1198,6 @@ static const struct mtk_pin_reg_calc mt8186_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8186_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8186_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8186_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8186_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8186_pin_pu_range), -- GitLab From 99cd54167d0b3c75e3957b45ecc1a584548a219e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 Jan 2024 15:19:08 +0800 Subject: [PATCH 0938/1418] pinctrl: mediatek: Drop bogus slew rate register range for MT8192 [ Upstream commit e15ab05a6b3ed42f2f43f8bd1a1abdbde64afecd ] The MT8192 does not support configuring pin slew rate. This is evident from both the datasheet, and the fact that the driver points the slew rate register range at the GPIO direction register range. Drop the bogus setting. Fixes: d32f38f2a8fc ("pinctrl: mediatek: Add pinctrl driver for mt8192") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240131071910.3950450-2-wenst@chromium.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8192.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8192.c b/drivers/pinctrl/mediatek/pinctrl-mt8192.c index 9695f4ec6aba9..f120268c00f56 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8192.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8192.c @@ -1379,7 +1379,6 @@ static const struct mtk_pin_reg_calc mt8192_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8192_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8192_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8192_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8192_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8192_pin_pu_range), -- GitLab From 1f3e9910422eaeb7b829807ad73143d58b557e0e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 6 Feb 2024 19:43:35 +0100 Subject: [PATCH 0939/1418] clk: qcom: reset: Commonize the de/assert functions [ Upstream commit eda40d9c583e95e0b6ac69d2950eec10f802e0e8 ] They do the same thing, except the last argument of the last function call differs. Commonize them. Reviewed-by: Bryan O'Donoghue Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240105-topic-venus_reset-v2-2-c37eba13b5ce@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 2f8cf2c3f3e3 ("clk: qcom: reset: Ensure write completion on reset de/assertion") Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index e45e32804d2c7..20d1d35aaf229 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -22,8 +22,8 @@ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id) return 0; } -static int -qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_set_assert(struct reset_controller_dev *rcdev, + unsigned long id, bool assert) { struct qcom_reset_controller *rst; const struct qcom_reset_map *map; @@ -33,21 +33,17 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) map = &rst->reset_map[id]; mask = map->bitmask ? map->bitmask : BIT(map->bit); - return regmap_update_bits(rst->regmap, map->reg, mask, mask); + return regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); } -static int -qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) { - struct qcom_reset_controller *rst; - const struct qcom_reset_map *map; - u32 mask; - - rst = to_qcom_reset_controller(rcdev); - map = &rst->reset_map[id]; - mask = map->bitmask ? map->bitmask : BIT(map->bit); + return qcom_reset_set_assert(rcdev, id, true); +} - return regmap_update_bits(rst->regmap, map->reg, mask, 0); +static int qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +{ + return qcom_reset_set_assert(rcdev, id, false); } const struct reset_control_ops qcom_reset_ops = { -- GitLab From 6814dc592d35a9484c3ac83c0a3b7ac07ed0f99b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 6 Feb 2024 19:43:36 +0100 Subject: [PATCH 0940/1418] clk: qcom: reset: Ensure write completion on reset de/assertion [ Upstream commit 2f8cf2c3f3e3f7ef61bd19abb4b0bb797ad50aaf ] Trying to toggle the resets in a rapid fashion can lead to the changes not actually arriving at the clock controller block when we expect them to. This was observed at least on SM8250. Read back the value after regmap_update_bits to ensure write completion. Fixes: b36ba30c8ac6 ("clk: qcom: Add reset controller support") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240105-topic-venus_reset-v2-3-c37eba13b5ce@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index 20d1d35aaf229..d96c96a9089f4 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -33,7 +33,12 @@ static int qcom_reset_set_assert(struct reset_controller_dev *rcdev, map = &rst->reset_map[id]; mask = map->bitmask ? map->bitmask : BIT(map->bit); - return regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); + regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); + + /* Read back the register to ensure write completion, ignore the value */ + regmap_read(rst->regmap, map->reg, &mask); + + return 0; } static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) -- GitLab From fd14781b30215672c09826f666ed65339885a358 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 30 Jun 2023 19:08:22 +0800 Subject: [PATCH 0941/1418] quota: simplify drop_dquot_ref() [ Upstream commit 7bce48f0fec602b3b6c335963b26d9eefa417788 ] As Honza said, remove_inode_dquot_ref() currently does not release the last dquot reference but instead adds the dquot to tofree_head list. This is because dqput() can sleep while dropping of the last dquot reference (writing back the dquot and calling ->release_dquot()) and that must not happen under dq_list_lock. Now that dqput() queues the final dquot cleanup into a workqueue, remove_inode_dquot_ref() can call dqput() unconditionally and we can significantly simplify it. Here we open code the simplified code of remove_inode_dquot_ref() into remove_dquot_ref() and remove the function put_dquot_list() which is no longer used. Signed-off-by: Baokun Li Signed-off-by: Jan Kara Message-Id: <20230630110822.3881712-6-libaokun1@huawei.com> Stable-dep-of: 179b8c97ebf6 ("quota: Fix rcu annotations of inode dquot pointers") Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 70 +++++++----------------------------------------- 1 file changed, 9 insertions(+), 61 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b0cf3869d3bf5..730d8ffc4928a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1084,59 +1084,7 @@ out: return err; } -/* - * Remove references to dquots from inode and add dquot to list for freeing - * if we have the last reference to dquot - */ -static void remove_inode_dquot_ref(struct inode *inode, int type, - struct list_head *tofree_head) -{ - struct dquot **dquots = i_dquot(inode); - struct dquot *dquot = dquots[type]; - - if (!dquot) - return; - - dquots[type] = NULL; - if (list_empty(&dquot->dq_free)) { - /* - * The inode still has reference to dquot so it can't be in the - * free list - */ - spin_lock(&dq_list_lock); - list_add(&dquot->dq_free, tofree_head); - spin_unlock(&dq_list_lock); - } else { - /* - * Dquot is already in a list to put so we won't drop the last - * reference here. - */ - dqput(dquot); - } -} - -/* - * Free list of dquots - * Dquots are removed from inodes and no new references can be got so we are - * the only ones holding reference - */ -static void put_dquot_list(struct list_head *tofree_head) -{ - struct list_head *act_head; - struct dquot *dquot; - - act_head = tofree_head->next; - while (act_head != tofree_head) { - dquot = list_entry(act_head, struct dquot, dq_free); - act_head = act_head->next; - /* Remove dquot from the list so we won't have problems... */ - list_del_init(&dquot->dq_free); - dqput(dquot); - } -} - -static void remove_dquot_ref(struct super_block *sb, int type, - struct list_head *tofree_head) +static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG @@ -1153,11 +1101,16 @@ static void remove_dquot_ref(struct super_block *sb, int type, */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { + struct dquot **dquots = i_dquot(inode); + struct dquot *dquot = dquots[type]; + #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - remove_inode_dquot_ref(inode, type, tofree_head); + dquots[type] = NULL; + if (dquot) + dqput(dquot); } spin_unlock(&dq_data_lock); } @@ -1174,13 +1127,8 @@ static void remove_dquot_ref(struct super_block *sb, int type, /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { - LIST_HEAD(tofree_head); - - if (sb->dq_op) { - remove_dquot_ref(sb, type, &tofree_head); - synchronize_srcu(&dquot_srcu); - put_dquot_list(&tofree_head); - } + if (sb->dq_op) + remove_dquot_ref(sb, type); } static inline -- GitLab From 7f9e833fc0f9b47be503af012eb5903086939754 Mon Sep 17 00:00:00 2001 From: Wang Jianjian Date: Fri, 2 Feb 2024 16:18:52 +0800 Subject: [PATCH 0942/1418] quota: Fix potential NULL pointer dereference [ Upstream commit d0aa72604fbd80c8aabb46eda00535ed35570f1f ] Below race may cause NULL pointer dereference P1 P2 dquot_free_inode quota_off drop_dquot_ref remove_dquot_ref dquots = i_dquot(inode) dquots = i_dquot(inode) srcu_read_lock dquots[cnt]) != NULL (1) dquots[type] = NULL (2) spin_lock(&dquots[cnt]->dq_dqb_lock) (3) .... If dquot_free_inode(or other routines) checks inode's quota pointers (1) before quota_off sets it to NULL(2) and use it (3) after that, NULL pointer dereference will be triggered. So let's fix it by using a temporary pointer to avoid this issue. Signed-off-by: Wang Jianjian Signed-off-by: Jan Kara Message-Id: <20240202081852.2514092-1-wangjianjian3@huawei.com> Stable-dep-of: 179b8c97ebf6 ("quota: Fix rcu annotations of inode dquot pointers") Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 98 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 730d8ffc4928a..44c4da364c994 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -399,15 +399,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquot) +static inline int mark_all_dquot_dirty(struct dquot * const *dquots) { int ret, err, cnt; + struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) /* Even in case of error we have to continue */ - ret = mark_dquot_dirty(dquot[cnt]); + ret = mark_dquot_dirty(dquot); if (!err) err = ret; } @@ -1684,6 +1686,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot **dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { @@ -1703,27 +1706,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; if (reserve) { - ret = dquot_add_space(dquots[cnt], 0, number, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { - ret = dquot_add_space(dquots[cnt], number, 0, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); if (reserve) - dquot_free_reserved_space(dquots[cnt], - number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; @@ -1754,6 +1756,7 @@ int dquot_alloc_inode(struct inode *inode) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) return 0; @@ -1764,17 +1767,19 @@ int dquot_alloc_inode(struct inode *inode) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]); + ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; /* Back out changes we already did */ - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } @@ -1796,6 +1801,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1811,9 +1817,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; @@ -1838,6 +1843,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1853,9 +1859,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; @@ -1882,6 +1887,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots; + struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { @@ -1902,17 +1908,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_bdq_free(dquots[cnt], number); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); + prepare_warning(&warn[cnt], dquot, wtype); if (reserve) - dquot_free_reserved_space(dquots[cnt], number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; @@ -1937,6 +1944,7 @@ void dquot_free_inode(struct inode *inode) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; int index; if (!inode_quota_active(inode)) @@ -1947,16 +1955,16 @@ void dquot_free_inode(struct inode *inode) spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; - warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_idq_free(dquots[cnt], 1); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + prepare_warning(&warn[cnt], dquot, wtype); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); @@ -1983,7 +1991,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot *transfer_from[MAXQUOTAS] = {}; - int cnt, ret = 0; + int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; @@ -2072,8 +2080,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); + /* + * These arrays are local and we hold dquot references so we don't need + * the srcu protection but still take dquot_srcu to avoid warning in + * mark_all_dquot_dirty(). + */ + index = srcu_read_lock(&dquot_srcu); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); + srcu_read_unlock(&dquot_srcu, index); + flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); -- GitLab From c12efda47df9ef2680c1fd6dbbad5c022521c31b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Feb 2024 15:32:09 +0100 Subject: [PATCH 0943/1418] quota: Fix rcu annotations of inode dquot pointers [ Upstream commit 179b8c97ebf63429589f5afeba59a181fe70603e ] Dquot pointers in i_dquot array in the inode are protected by dquot_srcu. Annotate the array pointers with __rcu, perform the locked dereferences with srcu_dereference_check() instead of plain reads, and set the array elements with rcu_assign_pointer(). Fixes: b9ba6f94b238 ("quota: remove dqptr_sem") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402061900.rTuYDlo6-lkp@intel.com/ Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 66 ++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 44c4da364c994..b67557647d61f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -399,7 +399,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquots) +static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; struct dquot *dquot; @@ -1006,14 +1006,15 @@ out: } EXPORT_SYMBOL(dqget); -static inline struct dquot **i_dquot(struct inode *inode) +static inline struct dquot __rcu **i_dquot(struct inode *inode) { - return inode->i_sb->s_op->get_dquots(inode); + /* Force __rcu for now until filesystems are fixed */ + return (struct dquot __rcu **)inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1103,14 +1104,16 @@ static void remove_dquot_ref(struct super_block *sb, int type) */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { - struct dquot **dquots = i_dquot(inode); - struct dquot *dquot = dquots[type]; + struct dquot __rcu **dquots = i_dquot(inode); + struct dquot *dquot = srcu_dereference_check( + dquots[type], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - dquots[type] = NULL; + rcu_assign_pointer(dquots[type], NULL); if (dquot) dqput(dquot); } @@ -1463,7 +1466,8 @@ static int inode_quota_active(const struct inode *inode) static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; - struct dquot **dquots, *got[MAXQUOTAS] = {}; + struct dquot __rcu **dquots; + struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; @@ -1538,7 +1542,7 @@ static int __dquot_initialize(struct inode *inode, int type) if (!got[cnt]) continue; if (!dquots[cnt]) { - dquots[cnt] = got[cnt]; + rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone @@ -1546,12 +1550,16 @@ static int __dquot_initialize(struct inode *inode, int type) */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { + struct dquot *dquot = srcu_dereference_check( + dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquots[cnt]->dq_dqb.dqb_rsvspace += rsv; - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot->dq_dqb.dqb_rsvspace += rsv; + spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } @@ -1573,7 +1581,7 @@ EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { - struct dquot **dquots; + struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) @@ -1598,13 +1606,14 @@ EXPORT_SYMBOL(dquot_initialize_needed); static void __dquot_drop(struct inode *inode) { int cnt; - struct dquot **dquots = i_dquot(inode); + struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - put[cnt] = dquots[cnt]; - dquots[cnt] = NULL; + put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); @@ -1612,7 +1621,7 @@ static void __dquot_drop(struct inode *inode) void dquot_drop(struct inode *inode) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1685,7 +1694,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; if (!inode_quota_active(inode)) { @@ -1755,7 +1764,7 @@ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; struct dquot *dquot; if (!inode_quota_active(inode)) @@ -1800,7 +1809,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; @@ -1842,7 +1851,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; @@ -1886,7 +1895,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; @@ -1943,7 +1952,7 @@ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; struct dquot *dquot; int index; @@ -1990,6 +1999,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; + struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; @@ -2022,6 +2032,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); + dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. @@ -2036,7 +2047,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; - transfer_from[cnt] = i_dquot(inode)[cnt]; + transfer_from[cnt] = srcu_dereference_check(dquots[cnt], + &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) @@ -2075,7 +2087,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } - i_dquot(inode)[cnt] = transfer_to[cnt]; + rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); @@ -2086,8 +2098,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) * mark_all_dquot_dirty(). */ index = srcu_read_lock(&dquot_srcu); - mark_all_dquot_dirty(transfer_from); - mark_all_dquot_dirty(transfer_to); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn_to); -- GitLab From 4421c746023bd368e17558eb9512ccfa6e82712c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Dec 2023 15:30:01 +0100 Subject: [PATCH 0944/1418] PCI: switchtec: Fix an error handling path in switchtec_pci_probe() [ Upstream commit dec529b0b0572b32f9eb91c882dd1f08ca657efb ] The commit in Fixes changed the logic on how resources are released and introduced a new switchtec_exit_pci() that need to be called explicitly in order to undo a corresponding switchtec_init_pci(). This was done in the remove function, but not in the probe. Fix the probe now. Fixes: df25461119d9 ("PCI: switchtec: Fix stdev_release() crash after surprise hot remove") Link: https://lore.kernel.org/r/01446d2ccb91a578239915812f2b7dfbeb2882af.1703428183.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 3f3320d0a4f8f..d05a482639e3c 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1674,7 +1674,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev, rc = switchtec_init_isr(stdev); if (rc) { dev_err(&stdev->dev, "failed to init isr.\n"); - goto err_put; + goto err_exit_pci; } iowrite32(SWITCHTEC_EVENT_CLEAR | @@ -1695,6 +1695,8 @@ static int switchtec_pci_probe(struct pci_dev *pdev, err_devadd: stdev_kill(stdev); +err_exit_pci: + switchtec_exit_pci(stdev); err_put: ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); put_device(&stdev->dev); -- GitLab From a71f66bd5f7b9b35a8aaa49e29565eca66299399 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Sun, 28 Jan 2024 12:29:06 +0800 Subject: [PATCH 0945/1418] crypto: xilinx - call finalize with bh disabled [ Upstream commit a853450bf4c752e664abab0b2fad395b7ad7701c ] When calling crypto_finalize_request, BH should be disabled to avoid triggering the following calltrace: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 74 at crypto/crypto_engine.c:58 crypto_finalize_request+0xa0/0x118 Modules linked in: cryptodev(O) CPU: 2 PID: 74 Comm: firmware:zynqmp Tainted: G O 6.8.0-rc1-yocto-standard #323 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : crypto_finalize_request+0xa0/0x118 lr : crypto_finalize_request+0x104/0x118 sp : ffffffc085353ce0 x29: ffffffc085353ce0 x28: 0000000000000000 x27: ffffff8808ea8688 x26: ffffffc081715038 x25: 0000000000000000 x24: ffffff880100db00 x23: ffffff880100da80 x22: 0000000000000000 x21: 0000000000000000 x20: ffffff8805b14000 x19: ffffff880100da80 x18: 0000000000010450 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000003 x13: 0000000000000000 x12: ffffff880100dad0 x11: 0000000000000000 x10: ffffffc0832dcd08 x9 : ffffffc0812416d8 x8 : 00000000000001f4 x7 : ffffffc0830d2830 x6 : 0000000000000001 x5 : ffffffc082091000 x4 : ffffffc082091658 x3 : 0000000000000000 x2 : ffffffc7f9653000 x1 : 0000000000000000 x0 : ffffff8802d20000 Call trace: crypto_finalize_request+0xa0/0x118 crypto_finalize_aead_request+0x18/0x30 zynqmp_handle_aes_req+0xcc/0x388 crypto_pump_work+0x168/0x2d8 kthread_worker_fn+0xfc/0x3a0 kthread+0x118/0x138 ret_from_fork+0x10/0x20 irq event stamp: 40 hardirqs last enabled at (39): [] _raw_spin_unlock_irqrestore+0x70/0xb0 hardirqs last disabled at (40): [] el1_dbg+0x28/0x90 softirqs last enabled at (36): [] kernel_neon_begin+0x8c/0xf0 softirqs last disabled at (34): [] kernel_neon_begin+0x60/0xf0 ---[ end trace 0000000000000000 ]--- Fixes: 4d96f7d48131 ("crypto: xilinx - Add Xilinx AES driver") Signed-off-by: Quanyang Wang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/xilinx/zynqmp-aes-gcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c index bf1f421e05f25..74bd3eb63734d 100644 --- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine, err = zynqmp_aes_aead_cipher(areq); } + local_bh_disable(); crypto_finalize_aead_request(engine, areq, err); + local_bh_enable(); + return 0; } -- GitLab From dc84f8c1a3dbea83a5be0bb9f25724b903286eef Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 6 Feb 2024 08:32:28 +0000 Subject: [PATCH 0946/1418] perf thread_map: Free strlist on normal path in thread_map__new_by_tid_str() [ Upstream commit 1eb3d924e3c0b8c27388b0583a989d757866efb6 ] slist needs to be freed in both error path and normal path in thread_map__new_by_tid_str(). Fixes: b52956c961be3a04 ("perf tools: Allow multiple threads or processes in record, stat, top") Reviewed-by: Arnaldo Carvalho de Melo Signed-off-by: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240206083228.172607-6-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- tools/perf/util/thread_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943b..cee7fc3b5bb0c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -279,13 +279,13 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + strlist__delete(slist); if (threads) refcount_set(&threads->refcnt, 1); return threads; out_free_threads: zfree(&threads); - strlist__delete(slist); goto out; } -- GitLab From e4892870a5f55d1a06aa1c3d87e58875f4945fdb Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 31 Jan 2024 16:47:36 -0800 Subject: [PATCH 0947/1418] drm/msm/dpu: fix the programming of INTF_CFG2_DATA_HCTL_EN [ Upstream commit 2f4a67a3894e15c135125cb54edc5b43abc1b70e ] Currently INTF_CFG2_DATA_HCTL_EN is coupled with the enablement of widebus but this is incorrect because we should be enabling this bit independent of widebus except for cases where compression is enabled in one pixel per clock mode. Fix this by making the condition checks more explicit and enabling INTF_CFG2_DATA_HCTL_EN for all other cases when supported by DPU. Fixes: 3309a7563971 ("drm/msm/dpu: revise timing engine programming to support widebus feature") Suggested-by: Dmitry Baryshkov Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/576722/ Link: https://lore.kernel.org/r/20240201004737.2478-1-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 7 +++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 7 +++++++ .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 15 +++++++++------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 25245ef386db6..4bdde5cb23aae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -228,6 +228,13 @@ bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc) return dpu_enc->wide_bus_en; } +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc) +{ + const struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); + + return dpu_enc->dsc ? true : false; +} + int dpu_encoder_get_crc_values_cnt(const struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h index 9e7236ef34e6d..a71efa2b9e508 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h @@ -175,6 +175,13 @@ int dpu_encoder_get_vsync_count(struct drm_encoder *drm_enc); bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc); +/** + * dpu_encoder_is_dsc_enabled - indicate whether dsc is enabled + * for the encoder. + * @drm_enc: Pointer to previously created drm encoder structure + */ +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc); + /** * dpu_encoder_get_crc_values_cnt - get number of physical encoders contained * in virtual encoder that can collect CRC values diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 2c14646661b77..09aeec00bf5e2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -100,6 +100,7 @@ static void drm_mode_to_intf_timing_params( } timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); /* * for DP, divide the horizonal parameters by 2 when diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 384558d2f9602..1debac4fcc3eb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -154,13 +154,8 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, hsync_ctl = (hsync_period << 16) | p->hsync_pulse_width; display_hctl = (hsync_end_x << 16) | hsync_start_x; - /* - * DATA_HCTL_EN controls data timing which can be different from - * video timing. It is recommended to enable it for all cases, except - * if compression is enabled in 1 pixel per clock mode - */ if (p->wide_bus_en) - intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN | INTF_CFG2_DATA_HCTL_EN; + intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN; data_width = p->width; @@ -230,6 +225,14 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, DPU_REG_WRITE(c, INTF_CONFIG, intf_cfg); DPU_REG_WRITE(c, INTF_PANEL_FORMAT, panel_format); if (ctx->cap->features & BIT(DPU_DATA_HCTL_EN)) { + /* + * DATA_HCTL_EN controls data timing which can be different from + * video timing. It is recommended to enable it for all cases, except + * if compression is enabled in 1 pixel per clock mode + */ + if (!(p->compression_en && !p->wide_bus_en)) + intf_cfg2 |= INTF_CFG2_DATA_HCTL_EN; + DPU_REG_WRITE(c, INTF_CONFIG2, intf_cfg2); DPU_REG_WRITE(c, INTF_DISPLAY_DATA_HCTL, display_data_hctl); DPU_REG_WRITE(c, INTF_ACTIVE_DATA_HCTL, active_data_hctl); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index e75339b96a1d2..7f502c8bee1d4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -33,6 +33,7 @@ struct intf_timing_params { u32 hsync_skew; bool wide_bus_en; + bool compression_en; }; struct intf_prog_fetch { -- GitLab From c264af81ad60aa4129ad2158c0d2a6c0a252fdd4 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 4 Feb 2024 18:45:27 +0100 Subject: [PATCH 0948/1418] drm/msm/dpu: Only enable DSC_MODE_MULTIPLEX if dsc_merge is enabled [ Upstream commit 06267d22f9ee6fd34150b6dcdb2fa6983e1a85bc ] When the topology calls for two interfaces on the current fixed topology of 2 DSC blocks, or uses 1 DSC block for a single interface (e.g. SC7280 with only one DSC block), there should be no merging of DSC output. This is already represented by the return value of dpu_encoder_use_dsc_merge(), but not yet used to correctly configure this flag. Fixes: 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in encoder") Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/577067/ Link: https://lore.kernel.org/r/20240204-dpu-dsc-multiplex-v1-1-080963233c52@somainline.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 4bdde5cb23aae..3632f0768aa9e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1871,7 +1871,9 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc, dsc_common_mode = 0; pic_width = dsc->pic_width; - dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL; + dsc_common_mode = DSC_MODE_SPLIT_PANEL; + if (dpu_encoder_use_dsc_merge(enc_master->parent)) + dsc_common_mode |= DSC_MODE_MULTIPLEX; if (enc_master->intf_mode == INTF_MODE_VIDEO) dsc_common_mode |= DSC_MODE_VIDEO; -- GitLab From 7d82efd8d3a955470a4f35376f51506dc22bd270 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 6 Feb 2024 08:48:14 -0800 Subject: [PATCH 0949/1418] drm/radeon/ni: Fix wrong firmware size logging in ni_init_microcode() [ Upstream commit c4891d979c7668b195a0a75787967ec95a24ecef ] Clean up a typo in pr_err() erroneously printing NI MC 'rdev->mc_fw->size' during SMC firmware load. Log 'rdev->smc_fw->size' instead. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 6596afd48af4 ("drm/radeon/kms: add dpm support for btc (v3)") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/ni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 927e5f42e97d0..3e48cbb522a1c 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -813,7 +813,7 @@ int ni_init_microcode(struct radeon_device *rdev) err = 0; } else if (rdev->smc_fw->size != smc_req_size) { pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", - rdev->mc_fw->size, fw_name); + rdev->smc_fw->size, fw_name); err = -EINVAL; } } -- GitLab From ca9b82755a51832e249ea7abda34ea8f3933db56 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Nov 2022 16:11:33 +0100 Subject: [PATCH 0950/1418] clk: renesas: r8a779g0: Add CMT clocks [ Upstream commit 523ed9442b997c39220ee364b07a8773623e3a58 ] Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20221104151135.4706-2-wsa+renesas@sang-engineering.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index d5b325e3c5398..f89cda70f2cbb 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -169,6 +169,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("i2c4", 522, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c5", 523, R8A779G0_CLK_S0D6_PER), DEF_MOD("wdt1:wdt0", 907, R8A779G0_CLK_R), + DEF_MOD("cmt0", 910, R8A779G0_CLK_R), + DEF_MOD("cmt1", 911, R8A779G0_CLK_R), + DEF_MOD("cmt2", 912, R8A779G0_CLK_R), + DEF_MOD("cmt3", 913, R8A779G0_CLK_R), DEF_MOD("pfc0", 915, R8A779G0_CLK_CL16M), DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), -- GitLab From bf10ef659b21510f3a2b02289a441c1a533ea3b8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 2 Feb 2023 01:03:24 +0000 Subject: [PATCH 0951/1418] clk: renesas: r8a779g0: Add Audio clocks [ Upstream commit 8dffb520ace48bcb996db049540c78261730213c ] Add module clocks for the Audio (SSI/SSIU) blocks on the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/878rhganfo.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index f89cda70f2cbb..d0e8deacdd0be 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -177,6 +177,8 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), + DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), }; /* -- GitLab From 4f6274bafde6180d7ec83e06041f039531454f58 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 9 Feb 2023 17:03:00 +0100 Subject: [PATCH 0952/1418] clk: renesas: r8a779g0: Add thermal clock [ Upstream commit 7502a04dae0e614bc14553e31461e50499bc67aa ] Add the module clock used by the Thermal Sensor/Chip Internal Voltage Monitor/Core Voltage Monitor (THS/CIVM/CVM) on the Renesas R-Car V4H (R8A779G0) SoC. Based on a large patch in the BSP by Kazuya Mizuguchi. Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/59461effd0d9f7a39e0c91352c87f2b7071b1891.1675958536.git.geert+renesas@glider.be Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index d0e8deacdd0be..aace98c0c4735 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -177,6 +177,7 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("tsc", 919, R8A779G0_CLK_CL16M), DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), }; -- GitLab From 128c04cc04726a168797af94f399bfb6c9ed285d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Jan 2024 16:43:26 +0100 Subject: [PATCH 0953/1418] clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit abb3fa662b8f8eaed1590b0e7a4e19eda467cdd3 ] According to the R-Car V4H Series Hardware User’s Manual Rev.1.00, the parent clock of the Pin Function (PFC/GPIO) module clocks is the CP clock. Fix this by adding the missing CP clock, and correcting the PFC parents. Fixes: f2afa78d5a0c0b0b ("dt-bindings: clock: Add r8a779g0 CPG Core Clock Definitions") Fixes: 36ff366033f0dde1 ("clk: renesas: r8a779g0: Add PFC/GPIO clocks") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/5401fccd204dc90b44f0013e7f53b9eff8df8214.1706197297.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 11 ++++++----- include/dt-bindings/clock/r8a779g0-cpg-mssr.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index aace98c0c4735..e4c616921e5ea 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -22,7 +22,7 @@ enum clk_ids { /* Core Clock Outputs exported to DT */ - LAST_DT_CORE_CLK = R8A779G0_CLK_R, + LAST_DT_CORE_CLK = R8A779G0_CLK_CP, /* External Input Clocks */ CLK_EXTAL, @@ -139,6 +139,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED("svd2_vip", R8A779G0_CLK_SVD2_VIP, CLK_SV_VIP, 2, 1), DEF_FIXED("cbfusa", R8A779G0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_FIXED("cpex", R8A779G0_CLK_CPEX, CLK_EXTAL, 2, 1), + DEF_FIXED("cp", R8A779G0_CLK_CP, CLK_EXTAL, 2, 1), DEF_FIXED("viobus", R8A779G0_CLK_VIOBUS, CLK_VIO, 1, 1), DEF_FIXED("viobusd2", R8A779G0_CLK_VIOBUSD2, CLK_VIO, 2, 1), DEF_FIXED("vcbus", R8A779G0_CLK_VCBUS, CLK_VC, 1, 1), @@ -173,10 +174,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("cmt1", 911, R8A779G0_CLK_R), DEF_MOD("cmt2", 912, R8A779G0_CLK_R), DEF_MOD("cmt3", 913, R8A779G0_CLK_R), - DEF_MOD("pfc0", 915, R8A779G0_CLK_CL16M), - DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), - DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), - DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("pfc0", 915, R8A779G0_CLK_CP), + DEF_MOD("pfc1", 916, R8A779G0_CLK_CP), + DEF_MOD("pfc2", 917, R8A779G0_CLK_CP), + DEF_MOD("pfc3", 918, R8A779G0_CLK_CP), DEF_MOD("tsc", 919, R8A779G0_CLK_CL16M), DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), diff --git a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h index 754c54a6eb06a..7850cdc62e285 100644 --- a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h +++ b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h @@ -86,5 +86,6 @@ #define R8A779G0_CLK_CPEX 74 #define R8A779G0_CLK_CBFUSA 75 #define R8A779G0_CLK_R 76 +#define R8A779G0_CLK_CP 77 #endif /* __DT_BINDINGS_CLOCK_R8A779G0_CPG_MSSR_H__ */ -- GitLab From 5342ad8db53c93d07ff0f4ec9120b0809f4f0442 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Jan 2024 16:45:13 +0100 Subject: [PATCH 0954/1418] clk: renesas: r8a779f0: Correct PFC/GPIO parent clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1b32a83a02d9433dbd8c5f4d6fc44aa597755bd ] According to the R-Car S4 Series Hardware User’s Manual Rev.0.81, the parent clock of the Pin Function (PFC/GPIO) module clock is the CP clock. As this clock is not documented to exist on R-Car S4, use the CPEX clock instead. Fixes: 73421f2a48e6bd1d ("clk: renesas: r8a779f0: Add PFC clock") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f88ec4aede0eaf0107c8bb7b28ba719ac6cd418f.1706197415.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779f0-cpg-mssr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/r8a779f0-cpg-mssr.c b/drivers/clk/renesas/r8a779f0-cpg-mssr.c index 27b668def357f..7a49b91c93710 100644 --- a/drivers/clk/renesas/r8a779f0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779f0-cpg-mssr.c @@ -159,7 +159,7 @@ static const struct mssr_mod_clk r8a779f0_mod_clks[] __initconst = { DEF_MOD("cmt1", 911, R8A779F0_CLK_R), DEF_MOD("cmt2", 912, R8A779F0_CLK_R), DEF_MOD("cmt3", 913, R8A779F0_CLK_R), - DEF_MOD("pfc0", 915, R8A779F0_CLK_CL16M), + DEF_MOD("pfc0", 915, R8A779F0_CLK_CPEX), DEF_MOD("tsc", 919, R8A779F0_CLK_CL16M), DEF_MOD("ufs", 1514, R8A779F0_CLK_S0D4_HSC), }; -- GitLab From b14524556fe33f7580fd870c89cb23e4939381a0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Feb 2024 14:53:43 +0100 Subject: [PATCH 0955/1418] ALSA: seq: fix function cast warnings [ Upstream commit d7bf73809849463f76de42aad62c850305dd6c5d ] clang-16 points out a control flow integrity (kcfi) issue when event callbacks get converted to incompatible types: sound/core/seq/seq_midi.c:135:30: error: cast from 'int (*)(struct snd_rawmidi_substream *, const char *, int)' to 'snd_seq_dump_func_t' (aka 'int (*)(void *, void *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 135 | snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)dump_midi, substream); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sound/core/seq/seq_virmidi.c:83:31: error: cast from 'int (*)(struct snd_rawmidi_substream *, const unsigned char *, int)' to 'snd_seq_dump_func_t' (aka 'int (*)(void *, void *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 83 | snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For addressing those errors, introduce wrapper functions that are used for callbacks and bridge to the actual function call with pointer cast. The code was originally added with the initial ALSA merge in linux-2.5.4. [ the patch description shamelessly copied from Arnd's original patch -- tiwai ] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213101020.459183-1-arnd@kernel.org Link: https://lore.kernel.org/r/20240213135343.16411-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_midi.c | 8 +++++++- sound/core/seq/seq_virmidi.c | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c index 4589aac091542..b00bbf18a6f5d 100644 --- a/sound/core/seq/seq_midi.c +++ b/sound/core/seq/seq_midi.c @@ -112,6 +112,12 @@ static int dump_midi(struct snd_rawmidi_substream *substream, const char *buf, i return 0; } +/* callback for snd_seq_dump_var_event(), bridging to dump_midi() */ +static int __dump_midi(void *ptr, void *buf, int count) +{ + return dump_midi(ptr, buf, count); +} + static int event_process_midi(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { @@ -131,7 +137,7 @@ static int event_process_midi(struct snd_seq_event *ev, int direct, pr_debug("ALSA: seq_midi: invalid sysex event flags = 0x%x\n", ev->flags); return 0; } - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)dump_midi, substream); + snd_seq_dump_var_event(ev, __dump_midi, substream); snd_midi_event_reset_decode(msynth->parser); } else { if (msynth->parser == NULL) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f5cae49500c81..ffd8e7202c334 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -62,6 +62,13 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, /* * decode input event and put to read buffer of each opened file */ + +/* callback for snd_seq_dump_var_event(), bridging to snd_rawmidi_receive() */ +static int dump_to_rawmidi(void *ptr, void *buf, int count) +{ + return snd_rawmidi_receive(ptr, buf, count); +} + static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, struct snd_seq_event *ev, bool atomic) @@ -80,7 +87,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_seq_dump_var_event(ev, dump_to_rawmidi, vmidi->substream); snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); -- GitLab From 511a6a88b24af64786c93bb902bfb924a1c01ef1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 9 Feb 2024 12:49:46 -0800 Subject: [PATCH 0956/1418] perf stat: Avoid metric-only segv [ Upstream commit 2543947c77e0e224bda86b4e7220c2f6714da463 ] Cycles is recognized as part of a hard coded metric in stat-shadow.c, it may call print_metric_only with a NULL fmt string leading to a segfault. Handle the NULL fmt explicitly. Fixes: 088519f318be ("perf stat: Move the display functions to stat-display.c") Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Cc: K Prateek Nayak Cc: James Clark Cc: Kaige Ye Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240209204947.3873294-4-irogers@google.com Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc866d18973e4..ef9a3df459657 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -366,7 +366,7 @@ static void print_metric_only(struct perf_stat_config *config, if (color) mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - color_snprintf(str, sizeof(str), color ?: "", fmt, val); + color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val); fprintf(out, "%*s ", mlen, str); } -- GitLab From fe636b9a6ede6ebf705b9db6b7590c8d642a48e1 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 13 Feb 2024 22:58:03 +0100 Subject: [PATCH 0957/1418] ASoC: meson: aiu: fix function pointer type mismatch [ Upstream commit 98ac85a00f31d2e9d5452b825a9ed0153d934043 ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: sound/soc/meson/aiu.c:243:12: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 243 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 6ae9ca9ce986 ("ASoC: meson: aiu: add i2s and spdif support") Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Justin Stitt Link: https://msgid.link/r/20240213215807.3326688-2-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/aiu.c | 19 ++++--------------- sound/soc/meson/aiu.h | 1 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/sound/soc/meson/aiu.c b/sound/soc/meson/aiu.c index 88e611e64d14f..077b9c0b6c4ca 100644 --- a/sound/soc/meson/aiu.c +++ b/sound/soc/meson/aiu.c @@ -218,11 +218,12 @@ static const char * const aiu_spdif_ids[] = { static int aiu_clk_get(struct device *dev) { struct aiu *aiu = dev_get_drvdata(dev); + struct clk *pclk; int ret; - aiu->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(aiu->pclk)) - return dev_err_probe(dev, PTR_ERR(aiu->pclk), "Can't get the aiu pclk\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "Can't get the aiu pclk\n"); aiu->spdif_mclk = devm_clk_get(dev, "spdif_mclk"); if (IS_ERR(aiu->spdif_mclk)) @@ -239,18 +240,6 @@ static int aiu_clk_get(struct device *dev) if (ret) return dev_err_probe(dev, ret, "Can't get the spdif clocks\n"); - ret = clk_prepare_enable(aiu->pclk); - if (ret) { - dev_err(dev, "peripheral clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - aiu->pclk); - if (ret) - dev_err(dev, "failed to add reset action on pclk"); - return ret; } diff --git a/sound/soc/meson/aiu.h b/sound/soc/meson/aiu.h index 393b6c2307e49..0f94c8bf60818 100644 --- a/sound/soc/meson/aiu.h +++ b/sound/soc/meson/aiu.h @@ -33,7 +33,6 @@ struct aiu_platform_data { }; struct aiu { - struct clk *pclk; struct clk *spdif_mclk; struct aiu_interface i2s; struct aiu_interface spdif; -- GitLab From 4a00001d22d1f5722d519da41fdddef6d3afdf99 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 13 Feb 2024 22:58:04 +0100 Subject: [PATCH 0958/1418] ASoC: meson: t9015: fix function pointer type mismatch [ Upstream commit 5ad992c71b6a8e8a547954addc7af9fbde6ca10a ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: sound/soc/meson/t9015.c:274:4: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 274 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 33901f5b9b16 ("ASoC: meson: add t9015 internal DAC driver") Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Justin Stitt Link: https://msgid.link/r/20240213215807.3326688-3-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/t9015.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/sound/soc/meson/t9015.c b/sound/soc/meson/t9015.c index 9c6b4dac68932..571f65788c592 100644 --- a/sound/soc/meson/t9015.c +++ b/sound/soc/meson/t9015.c @@ -48,7 +48,6 @@ #define POWER_CFG 0x10 struct t9015 { - struct clk *pclk; struct regulator *avdd; }; @@ -249,6 +248,7 @@ static int t9015_probe(struct platform_device *pdev) struct t9015 *priv; void __iomem *regs; struct regmap *regmap; + struct clk *pclk; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -256,26 +256,14 @@ static int t9015_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, priv); - priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get core clock\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "failed to get core clock\n"); priv->avdd = devm_regulator_get(dev, "AVDD"); if (IS_ERR(priv->avdd)) return dev_err_probe(dev, PTR_ERR(priv->avdd), "failed to AVDD\n"); - ret = clk_prepare_enable(priv->pclk); - if (ret) { - dev_err(dev, "core clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - priv->pclk); - if (ret) - return ret; - ret = device_reset(dev); if (ret) { dev_err(dev, "reset failed\n"); -- GitLab From b494caad9c6b19014da6aa6e53a0239bda25faad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 13 Feb 2024 14:58:37 +0100 Subject: [PATCH 0959/1418] powerpc: Force inlining of arch_vmap_p{u/m}d_supported() [ Upstream commit c5aebb53b32460bc52680dd4e2a2f6b84d5ea521 ] arch_vmap_pud_supported() and arch_vmap_pmd_supported() are expected to constant-fold to false when RADIX is not enabled. Force inlining in order to avoid following failure which leads to unexpected call of non-existing pud_set_huge() and pmd_set_huge() on powerpc 8xx. In function 'pud_huge_tests', inlined from 'debug_vm_pgtable' at mm/debug_vm_pgtable.c:1399:2: ./arch/powerpc/include/asm/vmalloc.h:9:33: warning: inlining failed in call to 'arch_vmap_pud_supported.isra': call is unlikely and code size would grow [-Winline] 9 | #define arch_vmap_pud_supported arch_vmap_pud_supported | ^~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/vmalloc.h:10:20: note: in expansion of macro 'arch_vmap_pud_supported' 10 | static inline bool arch_vmap_pud_supported(pgprot_t prot) | ^~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/vmalloc.h:9:33: note: called from here 9 | #define arch_vmap_pud_supported arch_vmap_pud_supported mm/debug_vm_pgtable.c:458:14: note: in expansion of macro 'arch_vmap_pud_supported' 458 | if (!arch_vmap_pud_supported(args->page_prot) || | ^~~~~~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402131836.OU1TDuoi-lkp@intel.com/ Fixes: 8309c9d71702 ("powerpc: inline huge vmap supported functions") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/bbd84ad52bf377e8d3b5865a906f2dc5d99964ba.1707832677.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/vmalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h index 4c69ece52a31e..59ed89890c902 100644 --- a/arch/powerpc/include/asm/vmalloc.h +++ b/arch/powerpc/include/asm/vmalloc.h @@ -7,14 +7,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #define arch_vmap_pud_supported arch_vmap_pud_supported -static inline bool arch_vmap_pud_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pud_supported(pgprot_t prot) { /* HPT does not cope with large pages in the vmalloc area */ return radix_enabled(); } #define arch_vmap_pmd_supported arch_vmap_pmd_supported -static inline bool arch_vmap_pmd_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pmd_supported(pgprot_t prot) { return radix_enabled(); } -- GitLab From 9a819504171d4500876d81b17237c6980e6f9334 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 20 Oct 2022 15:12:21 +0300 Subject: [PATCH 0960/1418] ASoC: SOF: Introduce container struct for SOF firmware [ Upstream commit 4f373ccf226e37a20fdc15a3df8034517a6045fd ] Move the firmware related information under a new struct (sof_firmware) and add it to the high level snd_sof_dev struct. Convert the generic code to use this new container when working with the basefw and for compatibility reasons set the old plat_data members used by the platforms. Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Chao Song Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20221020121238.18339-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Stable-dep-of: 98f681b0f84c ("ASoC: SOF: Add some bounds checking to firmware data") Signed-off-by: Sasha Levin --- sound/soc/sof/ipc3-loader.c | 26 ++++++++++++-------------- sound/soc/sof/ipc4-loader.c | 6 ++---- sound/soc/sof/loader.c | 18 +++++++++++++----- sound/soc/sof/sof-priv.h | 14 ++++++++++++++ 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/sound/soc/sof/ipc3-loader.c b/sound/soc/sof/ipc3-loader.c index bf423ca4e97bb..28218766d2114 100644 --- a/sound/soc/sof/ipc3-loader.c +++ b/sound/soc/sof/ipc3-loader.c @@ -138,8 +138,7 @@ static ssize_t ipc3_fw_ext_man_size(struct snd_sof_dev *sdev, const struct firmw static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; const struct sof_ext_man_elem_header *elem_hdr; const struct sof_ext_man_header *head; ssize_t ext_man_size; @@ -310,18 +309,18 @@ static int sof_ipc3_parse_module_memcpy(struct snd_sof_dev *sdev, static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; struct snd_sof_mod_hdr *module; int (*load_module)(struct snd_sof_dev *sof_dev, struct snd_sof_mod_hdr *hdr); size_t remaining; int ret, count; - if (!plat_data->fw) + if (!fw) return -EINVAL; - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); load_module = sof_ops(sdev)->load_module; if (!load_module) { dev_dbg(sdev->dev, "Using generic module loading\n"); @@ -331,9 +330,8 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) } /* parse each module */ - module = (struct snd_sof_mod_hdr *)(fw->data + plat_data->fw_offset + - sizeof(*header)); - remaining = fw->size - sizeof(*header) - plat_data->fw_offset; + module = (struct snd_sof_mod_hdr *)(fw->data + payload_offset + sizeof(*header)); + remaining = fw->size - sizeof(*header) - payload_offset; /* check for wrap */ if (remaining > fw->size) { dev_err(sdev->dev, "%s: fw size smaller than header size\n", __func__); @@ -374,19 +372,19 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) static int sof_ipc3_validate_firmware(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; - size_t fw_size = fw->size - plat_data->fw_offset; + size_t fw_size = fw->size - payload_offset; - if (fw->size <= plat_data->fw_offset) { + if (fw->size <= payload_offset) { dev_err(sdev->dev, "firmware size must be greater than firmware offset\n"); return -EINVAL; } /* Read the header information from the data pointer */ - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); /* verify FW sig */ if (strncmp(header->sig, SND_SOF_FW_SIG, SND_SOF_FW_SIG_SIZE) != 0) { diff --git a/sound/soc/sof/ipc4-loader.c b/sound/soc/sof/ipc4-loader.c index e635ae515fa9f..9f433e9b4cd37 100644 --- a/sound/soc/sof/ipc4-loader.c +++ b/sound/soc/sof/ipc4-loader.c @@ -17,9 +17,8 @@ static size_t sof_ipc4_fw_parse_ext_man(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; struct sof_man4_module_config *fm_config; struct sof_ipc4_fw_module *fw_module; @@ -138,9 +137,8 @@ static int sof_ipc4_validate_firmware(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; u32 fw_hdr_offset = ipc4_data->manifest_fw_hdr_offset; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; ext_man_hdr = (struct sof_ext_manifest4_hdr *)fw->data; diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 5f51d936b3067..ba8e3aae0a5cb 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -22,7 +22,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) int ret; /* Don't request firmware again if firmware is already requested */ - if (plat_data->fw) + if (sdev->basefw.fw) return 0; fw_filename = kasprintf(GFP_KERNEL, "%s/%s", @@ -31,7 +31,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) if (!fw_filename) return -ENOMEM; - ret = request_firmware(&plat_data->fw, fw_filename, sdev->dev); + ret = request_firmware(&sdev->basefw.fw, fw_filename, sdev->dev); if (ret < 0) { dev_err(sdev->dev, @@ -48,7 +48,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) ext_man_size = sdev->ipc->ops->fw_loader->parse_ext_manifest(sdev); if (ext_man_size > 0) { /* when no error occurred, drop extended manifest */ - plat_data->fw_offset = ext_man_size; + sdev->basefw.payload_offset = ext_man_size; } else if (!ext_man_size) { /* No extended manifest, so nothing to skip during FW load */ dev_dbg(sdev->dev, "firmware doesn't contain extended manifest\n"); @@ -58,6 +58,12 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) fw_filename, ret); } + /* + * Until the platform code is switched to use the new container the fw + * and payload offset must be set in plat_data + */ + plat_data->fw = sdev->basefw.fw; + plat_data->fw_offset = sdev->basefw.payload_offset; err: kfree(fw_filename); @@ -100,7 +106,8 @@ int snd_sof_load_firmware_memcpy(struct snd_sof_dev *sdev) return 0; error: - release_firmware(plat_data->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; plat_data->fw = NULL; return ret; @@ -185,7 +192,8 @@ EXPORT_SYMBOL(snd_sof_run_firmware); void snd_sof_fw_unload(struct snd_sof_dev *sdev) { /* TODO: support module unloading at runtime */ - release_firmware(sdev->pdata->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; sdev->pdata->fw = NULL; } EXPORT_SYMBOL(snd_sof_fw_unload); diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index de08825915b35..3d70b57e4864d 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -136,6 +136,17 @@ struct snd_sof_platform_stream_params { bool cont_update_posn; }; +/** + * struct sof_firmware - Container struct for SOF firmware + * @fw: Pointer to the firmware + * @payload_offset: Offset of the data within the loaded firmware image to be + * loaded to the DSP (skipping for example ext_manifest section) + */ +struct sof_firmware { + const struct firmware *fw; + u32 payload_offset; +}; + /* * SOF DSP HW abstraction operations. * Used to abstract DSP HW architecture and any IO busses between host CPU @@ -487,6 +498,9 @@ struct snd_sof_dev { spinlock_t ipc_lock; /* lock for IPC users */ spinlock_t hw_lock; /* lock for HW IO access */ + /* Main, Base firmware image */ + struct sof_firmware basefw; + /* * ASoC components. plat_drv fields are set dynamically so * can't use const -- GitLab From d133d67e7e724102d1e53009c4f88afaaf3e167c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Feb 2024 16:02:16 +0300 Subject: [PATCH 0961/1418] ASoC: SOF: Add some bounds checking to firmware data [ Upstream commit 98f681b0f84cfc3a1d83287b77697679e0398306 ] Smatch complains about "head->full_size - head->header_size" can underflow. To some extent, we're always going to have to trust the firmware a bit. However, it's easy enough to add a check for negatives, and let's add a upper bounds check as well. Fixes: d2458baa799f ("ASoC: SOF: ipc3-loader: Implement firmware parsing and loading") Signed-off-by: Dan Carpenter Link: https://msgid.link/r/5593d147-058c-4de3-a6f5-540ecb96f6f8@moroto.mountain Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/ipc3-loader.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/ipc3-loader.c b/sound/soc/sof/ipc3-loader.c index 28218766d2114..6e3ef06721106 100644 --- a/sound/soc/sof/ipc3-loader.c +++ b/sound/soc/sof/ipc3-loader.c @@ -148,6 +148,8 @@ static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) head = (struct sof_ext_man_header *)fw->data; remaining = head->full_size - head->header_size; + if (remaining < 0 || remaining > sdev->basefw.fw->size) + return -EINVAL; ext_man_size = ipc3_fw_ext_man_size(sdev, fw); /* Assert firmware starts with extended manifest */ -- GitLab From 298f7f137021e2d38affc5d163fa64001c199760 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Wed, 9 Nov 2022 17:28:52 +0800 Subject: [PATCH 0962/1418] NTB: EPF: fix possible memory leak in pci_vntb_probe() [ Upstream commit 956578e3d397e00d6254dc7b5194d28587f98518 ] As ntb_register_device() don't handle error of device_register(), if ntb_register_device() returns error in pci_vntb_probe(), name of kobject which is allocated in dev_set_name() called in device_add() is leaked. As comment of device_add() says, it should call put_device() to drop the reference count that was set in device_initialize() when it fails, so the name can be freed in kobject_cleanup(). Signed-off-by: ruanjinjie Signed-off-by: Jon Mason Stable-dep-of: aebfdfe39b93 ("NTB: fix possible name leak in ntb_register_device()") Signed-off-by: Sasha Levin --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 8c6931210ac4d..cd985a41c8d65 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1288,6 +1288,7 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_register_dev: + put_device(&ndev->ntb.dev); return -EINVAL; } -- GitLab From 6632a54ac8057cc0b0d789c6f73883e871bcd25c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 1 Dec 2023 11:30:56 +0800 Subject: [PATCH 0963/1418] NTB: fix possible name leak in ntb_register_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aebfdfe39b9327a3077d0df8db3beb3160c9bdd0 ] If device_register() fails in ntb_register_device(), the device name allocated by dev_set_name() should be freed. As per the comment in device_register(), callers should use put_device() to give up the reference in the error path. So fix this by calling put_device() in the error path so that the name can be freed in kobject_cleanup(). As a result of this, put_device() in the error path of ntb_register_device() is removed and the actual error is returned. Fixes: a1bd3baeb2f1 ("NTB: Add NTB hardware abstraction layer") Signed-off-by: Yang Yingliang Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231201033057.1399131-1-yangyingliang@huaweicloud.com [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/ntb/core.c | 8 +++++++- drivers/pci/endpoint/functions/pci-epf-vntb.c | 6 +----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/core.c b/drivers/ntb/core.c index 27dd93deff6e5..d702bee780826 100644 --- a/drivers/ntb/core.c +++ b/drivers/ntb/core.c @@ -100,6 +100,8 @@ EXPORT_SYMBOL(ntb_unregister_client); int ntb_register_device(struct ntb_dev *ntb) { + int ret; + if (!ntb) return -EINVAL; if (!ntb->pdev) @@ -120,7 +122,11 @@ int ntb_register_device(struct ntb_dev *ntb) ntb->ctx_ops = NULL; spin_lock_init(&ntb->ctx_lock); - return device_register(&ntb->dev); + ret = device_register(&ntb->dev); + if (ret) + put_device(&ntb->dev); + + return ret; } EXPORT_SYMBOL(ntb_register_device); diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index cd985a41c8d65..b4c1a4f6029d4 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1281,15 +1281,11 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = ntb_register_device(&ndev->ntb); if (ret) { dev_err(dev, "Failed to register NTB device\n"); - goto err_register_dev; + return ret; } dev_dbg(dev, "PCI Virtual NTB driver loaded\n"); return 0; - -err_register_dev: - put_device(&ndev->ntb.dev); - return -EINVAL; } static struct pci_device_id pci_vntb_table[] = { -- GitLab From 8006813ebdf123583f35db41abcfacfc0c671844 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 19 Oct 2022 19:45:50 +0200 Subject: [PATCH 0964/1418] media: cedrus: h265: Associate mv col buffers with buffer [ Upstream commit 0ee952c2f484ee0059f7ce4951aaa3cb0eda96dd ] Currently mv col aux buffers are allocated as a pool. This is not optimal because pool size equals number of buffers before stream is started. Buffers can easily be allocated afterwards. In such cases, invalid pointer is assigned to the decoding frame and Cedrus might overwrite memory location which is allocated to different task. Solve this issue with allocating mv col buffer once capture buffer is actually used. Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Stable-dep-of: 3a11887f7f11 ("media: cedrus: h265: Fix configuring bitstream size") Signed-off-by: Sasha Levin --- drivers/staging/media/sunxi/cedrus/cedrus.h | 9 +-- .../staging/media/sunxi/cedrus/cedrus_h265.c | 63 ++++++++++--------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h index 93a2196006f73..cb99610f3e128 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -109,6 +109,11 @@ struct cedrus_buffer { unsigned int position; enum cedrus_h264_pic_type pic_type; } h264; + struct { + void *mv_col_buf; + dma_addr_t mv_col_buf_dma; + ssize_t mv_col_buf_size; + } h265; } codec; }; @@ -142,10 +147,6 @@ struct cedrus_ctx { ssize_t intra_pred_buf_size; } h264; struct { - void *mv_col_buf; - dma_addr_t mv_col_buf_addr; - ssize_t mv_col_buf_size; - ssize_t mv_col_buf_unit_size; void *neighbor_info_buf; dma_addr_t neighbor_info_buf_addr; void *entry_points_buf; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 625f77a8c5bde..7a438cd22c341 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -90,12 +90,13 @@ static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, } static inline dma_addr_t -cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, - unsigned int index, unsigned int field) +cedrus_h265_frame_info_mv_col_buf_addr(struct vb2_buffer *buf, + unsigned int field) { - return ctx->codec.h265.mv_col_buf_addr + index * - ctx->codec.h265.mv_col_buf_unit_size + - field * ctx->codec.h265.mv_col_buf_unit_size / 2; + struct cedrus_buffer *cedrus_buf = vb2_to_cedrus_buffer(buf); + + return cedrus_buf->codec.h265.mv_col_buf_dma + + field * cedrus_buf->codec.h265.mv_col_buf_size / 2; } static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, @@ -108,9 +109,8 @@ static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buf, 0); dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buf, 1); dma_addr_t mv_col_buf_addr[2] = { - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 0), - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, - field_pic ? 1 : 0) + cedrus_h265_frame_info_mv_col_buf_addr(buf, 0), + cedrus_h265_frame_info_mv_col_buf_addr(buf, field_pic ? 1 : 0) }; u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; @@ -412,6 +412,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) unsigned int width_in_ctb_luma, ctb_size_luma; unsigned int log2_max_luma_coding_block_size; unsigned int ctb_addr_x, ctb_addr_y; + struct cedrus_buffer *cedrus_buf; dma_addr_t src_buf_addr; dma_addr_t src_buf_end_addr; u32 chroma_log2_weight_denom; @@ -428,6 +429,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) decode_params = run->h265.decode_params; pred_weight_table = &slice_params->pred_weight_table; num_entry_point_offsets = slice_params->num_entry_point_offsets; + cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); /* * If entry points offsets are present, we should get them @@ -445,31 +447,25 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); /* MV column buffer size and allocation. */ - if (!ctx->codec.h265.mv_col_buf_size) { - unsigned int num_buffers = - run->dst->vb2_buf.vb2_queue->num_buffers; - + if (!cedrus_buf->codec.h265.mv_col_buf_size) { /* * Each CTB requires a MV col buffer with a specific unit size. * Since the address is given with missing lsb bits, 1 KiB is * added to each buffer to ensure proper alignment. */ - ctx->codec.h265.mv_col_buf_unit_size = + cedrus_buf->codec.h265.mv_col_buf_size = DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; - ctx->codec.h265.mv_col_buf_size = num_buffers * - ctx->codec.h265.mv_col_buf_unit_size; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ - ctx->codec.h265.mv_col_buf = + cedrus_buf->codec.h265.mv_col_buf = dma_alloc_attrs(dev->dev, - ctx->codec.h265.mv_col_buf_size, - &ctx->codec.h265.mv_col_buf_addr, + cedrus_buf->codec.h265.mv_col_buf_size, + &cedrus_buf->codec.h265.mv_col_buf_dma, GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); - if (!ctx->codec.h265.mv_col_buf) { - ctx->codec.h265.mv_col_buf_size = 0; + if (!cedrus_buf->codec.h265.mv_col_buf) { + cedrus_buf->codec.h265.mv_col_buf_size = 0; return -ENOMEM; } } @@ -816,9 +812,6 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; - /* The buffer size is calculated at setup time. */ - ctx->codec.h265.mv_col_buf_size = 0; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ ctx->codec.h265.neighbor_info_buf = dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, @@ -845,14 +838,24 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) static void cedrus_h265_stop(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; + struct cedrus_buffer *buf; + struct vb2_queue *vq; + unsigned int i; - if (ctx->codec.h265.mv_col_buf_size > 0) { - dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, - ctx->codec.h265.mv_col_buf, - ctx->codec.h265.mv_col_buf_addr, - DMA_ATTR_NO_KERNEL_MAPPING); + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + for (i = 0; i < vq->num_buffers; i++) { + buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i)); - ctx->codec.h265.mv_col_buf_size = 0; + if (buf->codec.h265.mv_col_buf_size > 0) { + dma_free_attrs(dev->dev, + buf->codec.h265.mv_col_buf_size, + buf->codec.h265.mv_col_buf, + buf->codec.h265.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + + buf->codec.h265.mv_col_buf_size = 0; + } } dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, -- GitLab From acdf24f3d5232a86b81f7a14bd2998a78cb84843 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:09:25 +0100 Subject: [PATCH 0965/1418] media: cedrus: h265: Fix configuring bitstream size [ Upstream commit 3a11887f7f11a6bb1f05e7f67b3ea20dadfec443 ] bit_size field holds size of slice, not slice + header. Because of HW quirks, driver can't program in just slice, but also preceding header. But that means that currently used bit_size is wrong (too small). Instead, just use size of whole buffer. There is no harm in doing this. Fixes: 86caab29da78 ("media: cedrus: Add HEVC/H.265 decoding support") Suggested-by: Paul Kocialkowski Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 7a438cd22c341..9f13c942a806b 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -414,11 +414,11 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) unsigned int ctb_addr_x, ctb_addr_y; struct cedrus_buffer *cedrus_buf; dma_addr_t src_buf_addr; - dma_addr_t src_buf_end_addr; u32 chroma_log2_weight_denom; u32 num_entry_point_offsets; u32 output_pic_list_index; u32 pic_order_cnt[2]; + size_t slice_bytes; u8 padding; int count; u32 reg; @@ -430,6 +430,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) pred_weight_table = &slice_params->pred_weight_table; num_entry_point_offsets = slice_params->num_entry_point_offsets; cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); + slice_bytes = vb2_get_plane_payload(&run->src->vb2_buf, 0); /* * If entry points offsets are present, we should get them @@ -477,7 +478,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); - reg = slice_params->bit_size; + reg = slice_bytes * 8; cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); /* Source beginning and end addresses. */ @@ -491,10 +492,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); - src_buf_end_addr = src_buf_addr + - DIV_ROUND_UP(slice_params->bit_size, 8); - - reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); + reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_addr + slice_bytes); cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); /* Coding tree block address */ -- GitLab From 0dfdf4c1ac4cfbbbeb5be2cd8b2148f7ff8e3817 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:20 +0100 Subject: [PATCH 0966/1418] media: sun8i-di: Fix coefficient writes [ Upstream commit 794b581f8c6eb7b60fe468ccb96dd3cd38ff779f ] Currently coefficients are applied only once, since they don't change. However, this is done before enable bit is set and thus it doesn't get applied properly. Fix that by applying coefficients after enable bit is set. While this means that it will be done evey time, it doesn't bring much time penalty. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/platform/sunxi/sun8i-di/sun8i-di.c | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index aa65d70b6270a..3e58de58cd89d 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -66,6 +66,7 @@ static void deinterlace_device_run(void *priv) struct vb2_v4l2_buffer *src, *dst; unsigned int hstep, vstep; dma_addr_t addr; + int i; src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); @@ -160,6 +161,26 @@ static void deinterlace_device_run(void *priv) deinterlace_write(dev, DEINTERLACE_CH1_HORZ_FACT, hstep); deinterlace_write(dev, DEINTERLACE_CH1_VERT_FACT, vstep); + /* neutral filter coefficients */ + deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS); + readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, + val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); + + for (i = 0; i < 32; i++) { + deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + } + + deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); + deinterlace_clr_set_bits(dev, DEINTERLACE_FIELD_CTRL, DEINTERLACE_FIELD_CTRL_FIELD_CNT_MSK, DEINTERLACE_FIELD_CTRL_FIELD_CNT(ctx->field)); @@ -248,7 +269,6 @@ static irqreturn_t deinterlace_irq(int irq, void *data) static void deinterlace_init(struct deinterlace_dev *dev) { u32 val; - int i; deinterlace_write(dev, DEINTERLACE_BYPASS, DEINTERLACE_BYPASS_CSC); @@ -285,26 +305,6 @@ static void deinterlace_init(struct deinterlace_dev *dev) deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF, DEINTERLACE_CHROMA_DIFF_TH_MSK, DEINTERLACE_CHROMA_DIFF_TH(5)); - - /* neutral filter coefficients */ - deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS); - readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, - val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); - - for (i = 0; i < 32; i++) { - deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - } - - deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); } static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file) -- GitLab From f4258e5882e299a20ef723007d5cb9ef29a41e3a Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:21 +0100 Subject: [PATCH 0967/1418] media: sun8i-di: Fix power on/off sequences [ Upstream commit cff104e33bad38f4b2c8d58816a7accfaa2879f9 ] According to user manual, reset line should be deasserted before clocks are enabled. Also fix power down sequence to be reverse of that. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/platform/sunxi/sun8i-di/sun8i-di.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index 3e58de58cd89d..01b44dd708bd3 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -931,11 +931,18 @@ static int deinterlace_runtime_resume(struct device *device) return ret; } + ret = reset_control_deassert(dev->rstc); + if (ret) { + dev_err(dev->dev, "Failed to apply reset\n"); + + goto err_exclusive_rate; + } + ret = clk_prepare_enable(dev->bus_clk); if (ret) { dev_err(dev->dev, "Failed to enable bus clock\n"); - goto err_exclusive_rate; + goto err_rst; } ret = clk_prepare_enable(dev->mod_clk); @@ -952,23 +959,16 @@ static int deinterlace_runtime_resume(struct device *device) goto err_mod_clk; } - ret = reset_control_deassert(dev->rstc); - if (ret) { - dev_err(dev->dev, "Failed to apply reset\n"); - - goto err_ram_clk; - } - deinterlace_init(dev); return 0; -err_ram_clk: - clk_disable_unprepare(dev->ram_clk); err_mod_clk: clk_disable_unprepare(dev->mod_clk); err_bus_clk: clk_disable_unprepare(dev->bus_clk); +err_rst: + reset_control_assert(dev->rstc); err_exclusive_rate: clk_rate_exclusive_put(dev->mod_clk); @@ -979,11 +979,12 @@ static int deinterlace_runtime_suspend(struct device *device) { struct deinterlace_dev *dev = dev_get_drvdata(device); - reset_control_assert(dev->rstc); - clk_disable_unprepare(dev->ram_clk); clk_disable_unprepare(dev->mod_clk); clk_disable_unprepare(dev->bus_clk); + + reset_control_assert(dev->rstc); + clk_rate_exclusive_put(dev->mod_clk); return 0; -- GitLab From 77301ad7bebf00f60d01decc12c51d27321acbb8 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:22 +0100 Subject: [PATCH 0968/1418] media: sun8i-di: Fix chroma difference threshold [ Upstream commit 856525e8db272b0ce6d9c6e6c2eeb97892b485a6 ] While there is no good explanation what this value does, vendor driver uses value 31 for it. Align driver with it. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/sunxi/sun8i-di/sun8i-di.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index 01b44dd708bd3..7a2f558c981db 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -304,7 +304,7 @@ static void deinterlace_init(struct deinterlace_dev *dev) deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF, DEINTERLACE_CHROMA_DIFF_TH_MSK, - DEINTERLACE_CHROMA_DIFF_TH(5)); + DEINTERLACE_CHROMA_DIFF_TH(31)); } static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file) -- GitLab From 8df9a3c7044b847e9c4dc7e683fd64c6b873f328 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 31 Jan 2024 13:00:33 +0100 Subject: [PATCH 0969/1418] media: imx: csc/scaler: fix v4l2_ctrl_handler memory leak [ Upstream commit 4797a3dd46f220e6d83daf54d70c5b33db6deb01 ] Free the memory allocated in v4l2_ctrl_handler_init on release. Fixes: a8ef0488cc59 ("media: imx: add csc/scaler mem2mem device") Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/imx/imx-media-csc-scaler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c index 1fd39a2fca98a..95cca281e8a37 100644 --- a/drivers/staging/media/imx/imx-media-csc-scaler.c +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c @@ -803,6 +803,7 @@ static int ipu_csc_scaler_release(struct file *file) dev_dbg(priv->dev, "Releasing instance %p\n", ctx); + v4l2_ctrl_handler_free(&ctx->ctrl_hdlr); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); -- GitLab From 8e6e28e650bac0eec2f18f022dd830a4b76daf53 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Sun, 11 Feb 2024 07:07:05 -0800 Subject: [PATCH 0970/1418] media: go7007: add check of return value of go7007_read_addr() [ Upstream commit 0b70530ee740861f4776ff724fcc25023df1799a ] If go7007_read_addr() returns error channel is not assigned a value. In this case go to allocfail. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 866b8695d67e ("Staging: add the go7007 video driver") Signed-off-by: Daniil Dulov Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/go7007/go7007-usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/go7007/go7007-usb.c b/drivers/media/usb/go7007/go7007-usb.c index eeb85981e02b6..762c13e49bfa5 100644 --- a/drivers/media/usb/go7007/go7007-usb.c +++ b/drivers/media/usb/go7007/go7007-usb.c @@ -1201,7 +1201,9 @@ static int go7007_usb_probe(struct usb_interface *intf, u16 channel; /* read channel number from GPIO[1:0] */ - go7007_read_addr(go, 0x3c81, &channel); + if (go7007_read_addr(go, 0x3c81, &channel)) + goto allocfail; + channel &= 0x3; go->board_id = GO7007_BOARDID_ADLINK_MPG24; usb->board = board = &board_adlink_mpg24; -- GitLab From 7dee677d17c9a38d5ae4853e19ee79eb9ed8a997 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Sun, 11 Feb 2024 07:07:25 -0800 Subject: [PATCH 0971/1418] media: pvrusb2: remove redundant NULL check [ Upstream commit 95ac1210fb2753f968ebce0730d4fbc553c2a3dc ] Pointer dip->stream cannot be NULL due to a shift, thus remove redundant NULL check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c74e0062684b ("V4L/DVB (5059): Pvrusb2: Be smarter about mode restoration") Signed-off-by: Daniil Dulov Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index c04ab7258d645..d195bd6a2248b 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -1198,11 +1198,6 @@ static void pvr2_v4l2_dev_init(struct pvr2_v4l2_dev *dip, dip->minor_type = pvr2_v4l_type_video; nr_ptr = video_nr; caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_AUDIO; - if (!dip->stream) { - pr_err(KBUILD_MODNAME - ": Failed to set up pvrusb2 v4l video dev due to missing stream instance\n"); - return; - } break; case VFL_TYPE_VBI: dip->config = pvr2_config_vbi; -- GitLab From 6e93e0f8832223769e61ed4accd8247da5ed839e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:04:27 +0100 Subject: [PATCH 0972/1418] media: pvrusb2: fix pvr2_stream_callback casts [ Upstream commit 30baa4a96b23add91a87305baaeba82c4e109e1f ] clang-16 complains about a control flow integrity (KCFI) issue in pvrusb2, which casts three different prototypes into pvr2_stream_callback: drivers/media/usb/pvrusb2/pvrusb2-v4l2.c:1070:30: error: cast from 'void (*)(struct pvr2_v4l2_fh *)' to 'pvr2_stream_callback' (aka 'void (*)(void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1070 | pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/usb/pvrusb2/pvrusb2-context.c:110:6: error: cast from 'void (*)(struct pvr2_context *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 110 | (void (*)(void *))pvr2_context_notify, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/usb/pvrusb2/pvrusb2-dvb.c:152:6: error: cast from 'void (*)(struct pvr2_dvb_adapter *)' to 'pvr2_stream_callback' (aka 'void (*)(void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 152 | (pvr2_stream_callback) pvr2_dvb_notify, adap); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change the functions to actually take a void* argument so the cast is no longer needed. Fixes: bb8ce9d9143c ("V4L/DVB (7682): pvrusb2-dvb: finish up stream & buffer handling") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-context.c | 8 ++++---- drivers/media/usb/pvrusb2/pvrusb2-dvb.c | 6 ++++-- drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 6 ++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 1764674de98bc..58f2f3ff10ee2 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -90,8 +90,10 @@ static void pvr2_context_destroy(struct pvr2_context *mp) } -static void pvr2_context_notify(struct pvr2_context *mp) +static void pvr2_context_notify(void *ptr) { + struct pvr2_context *mp = ptr; + pvr2_context_set_notify(mp,!0); } @@ -106,9 +108,7 @@ static void pvr2_context_check(struct pvr2_context *mp) pvr2_trace(PVR2_TRACE_CTXT, "pvr2_context %p (initialize)", mp); /* Finish hardware initialization */ - if (pvr2_hdw_initialize(mp->hdw, - (void (*)(void *))pvr2_context_notify, - mp)) { + if (pvr2_hdw_initialize(mp->hdw, pvr2_context_notify, mp)) { mp->video_stream.stream = pvr2_hdw_get_video_stream(mp->hdw); /* Trigger interface initialization. By doing this diff --git a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c index 26811efe0fb58..9a9bae21c6147 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c @@ -88,8 +88,10 @@ static int pvr2_dvb_feed_thread(void *data) return stat; } -static void pvr2_dvb_notify(struct pvr2_dvb_adapter *adap) +static void pvr2_dvb_notify(void *ptr) { + struct pvr2_dvb_adapter *adap = ptr; + wake_up(&adap->buffer_wait_data); } @@ -149,7 +151,7 @@ static int pvr2_dvb_stream_do_start(struct pvr2_dvb_adapter *adap) } pvr2_stream_set_callback(pvr->video_stream.stream, - (pvr2_stream_callback) pvr2_dvb_notify, adap); + pvr2_dvb_notify, adap); ret = pvr2_stream_set_buffer_count(stream, PVR2_DVB_BUFFER_COUNT); if (ret < 0) return ret; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index d195bd6a2248b..d608b793fa847 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -1033,8 +1033,10 @@ static int pvr2_v4l2_open(struct file *file) } -static void pvr2_v4l2_notify(struct pvr2_v4l2_fh *fhp) +static void pvr2_v4l2_notify(void *ptr) { + struct pvr2_v4l2_fh *fhp = ptr; + wake_up(&fhp->wait_data); } @@ -1067,7 +1069,7 @@ static int pvr2_v4l2_iosetup(struct pvr2_v4l2_fh *fh) hdw = fh->channel.mc_head->hdw; sp = fh->pdi->stream->stream; - pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh); + pvr2_stream_set_callback(sp, pvr2_v4l2_notify, fh); pvr2_hdw_set_stream_type(hdw,fh->pdi->config); if ((ret = pvr2_hdw_set_streaming(hdw,!0)) < 0) return ret; return pvr2_ioread_set_enabled(fh->rhp,!0); -- GitLab From 2101966b774499da993984a4b07c7c6c6ea45fdd Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 3 Jan 2024 21:20:18 +0100 Subject: [PATCH 0973/1418] clk: qcom: dispcc-sdm845: Adjust internal GDSC wait times [ Upstream commit 117e7dc697c2739d754db8fe0c1e2d4f1f5d5f82 ] SDM845 downstream uses non-default values for GDSC internal waits. Program them accordingly to avoid surprises. Fixes: 81351776c9fb ("clk: qcom: Add display clock controller driver for SDM845") Signed-off-by: Konrad Dybcio Tested-by: Caleb Connolly # OnePlus 6 Link: https://lore.kernel.org/r/20240103-topic-845gdsc-v1-1-368efbe1a61d@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sdm845.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/dispcc-sdm845.c b/drivers/clk/qcom/dispcc-sdm845.c index 735adfefc3798..e792e0b130d33 100644 --- a/drivers/clk/qcom/dispcc-sdm845.c +++ b/drivers/clk/qcom/dispcc-sdm845.c @@ -759,6 +759,8 @@ static struct clk_branch disp_cc_mdss_vsync_clk = { static struct gdsc mdss_gdsc = { .gdscr = 0x3000, + .en_few_wait_val = 0x6, + .en_rest_wait_val = 0x5, .pd = { .name = "mdss_gdsc", }, -- GitLab From 7a08ebcfd7213e948f77fc3f7245416ddb3132d4 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 15 Feb 2024 09:53:09 +0100 Subject: [PATCH 0974/1418] drm/mediatek: dsi: Fix DSI RGB666 formats and definitions [ Upstream commit fae6f815505301b92d9113764f4d76d0bfe45607 ] The register bits definitions for RGB666 formats are wrong in multiple ways: first, in the DSI_PS_SEL bits region, the Packed 18-bits RGB666 format is selected with bit 1, while the Loosely Packed one is bit 2, and second - the definition name "LOOSELY_PS_18BIT_RGB666" is wrong because the loosely packed format is 24 bits instead! Either way, functions mtk_dsi_ps_control_vact() and mtk_dsi_ps_control() do not even agree on the DSI_PS_SEL bit to set in DSI_PSCTRL: one sets loosely packed (24) on RGB666, the other sets packed (18), and the other way around for RGB666_PACKED. Fixing this entire stack of issues is done in one go: - Use the correct bit for the Loosely Packed RGB666 definition - Rename LOOSELY_PS_18BIT_RGB666 to LOOSELY_PS_24BIT_RGB666 - Change ps_bpp_mode in mtk_dsi_ps_control_vact() to set: - Loosely Packed, 24-bits for MIPI_DSI_FMT_RGB666 - Packed, 18-bits for MIPI_DSI_FMT_RGB666_PACKED Fixes: 2e54c14e310f ("drm/mediatek: Add DSI sub driver") Reviewed-by: Alexandre Mergnat Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20240215085316.56835-3-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_dsi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 3e74c7c1b89fa..d871b1dba083d 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -70,8 +70,8 @@ #define DSI_PS_WC 0x3fff #define DSI_PS_SEL (3 << 16) #define PACKED_PS_16BIT_RGB565 (0 << 16) -#define LOOSELY_PS_18BIT_RGB666 (1 << 16) -#define PACKED_PS_18BIT_RGB666 (2 << 16) +#define PACKED_PS_18BIT_RGB666 (1 << 16) +#define LOOSELY_PS_24BIT_RGB666 (2 << 16) #define PACKED_PS_24BIT_RGB888 (3 << 16) #define DSI_VSA_NL 0x20 @@ -366,10 +366,10 @@ static void mtk_dsi_ps_control_vact(struct mtk_dsi *dsi) ps_bpp_mode |= PACKED_PS_24BIT_RGB888; break; case MIPI_DSI_FMT_RGB666: - ps_bpp_mode |= PACKED_PS_18BIT_RGB666; + ps_bpp_mode |= LOOSELY_PS_24BIT_RGB666; break; case MIPI_DSI_FMT_RGB666_PACKED: - ps_bpp_mode |= LOOSELY_PS_18BIT_RGB666; + ps_bpp_mode |= PACKED_PS_18BIT_RGB666; break; case MIPI_DSI_FMT_RGB565: ps_bpp_mode |= PACKED_PS_16BIT_RGB565; @@ -423,7 +423,7 @@ static void mtk_dsi_ps_control(struct mtk_dsi *dsi) dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666: - tmp_reg = LOOSELY_PS_18BIT_RGB666; + tmp_reg = LOOSELY_PS_24BIT_RGB666; dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666_PACKED: -- GitLab From ce106d8ef0ba5f7e6fb21a11e0b1d23f145a0e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Wedekind?= Date: Mon, 19 Feb 2024 14:28:11 +0100 Subject: [PATCH 0975/1418] PCI: Mark 3ware-9650SE Root Port Extended Tags as broken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit baf67aefbe7d7deafa59ca49612d163f8889934c ] Per PCIe r6.1, sec 2.2.6.2 and 7.5.3.4, a Requester may not use 8-bit Tags unless its Extended Tag Field Enable is set, but all Receivers/Completers must handle 8-bit Tags correctly regardless of their Extended Tag Field Enable. Some devices do not handle 8-bit Tags as Completers, so add a quirk for them. If we find such a device, we disable Extended Tags for the entire hierarchy to make peer-to-peer DMA possible. The 3ware 9650SE seems to have issues with handling 8-bit tags. Mark it as broken. This fixes PCI Parity Errors like : 3w-9xxx: scsi0: ERROR: (0x06:0x000C): PCI Parity Error: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x000D): PCI Abort: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x000E): Controller Queue Error: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x0010): Microcontroller Error: clearing. Link: https://lore.kernel.org/r/20240219132811.8351-1-joerg@wedekind.de Fixes: 60db3a4d8cc9 ("PCI: Enable PCIe Extended Tags if supported") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=202425 Signed-off-by: Jörg Wedekind Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 51d634fbdfb8e..c175b70a984c6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5415,6 +5415,7 @@ static void quirk_no_ext_tags(struct pci_dev *pdev) pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL); } +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_3WARE, 0x1004, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags); -- GitLab From d183ee71366c7de1cb999b7573c1d6da2dc483cb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 10 Jan 2024 19:58:21 +0100 Subject: [PATCH 0976/1418] clk: hisilicon: hi3519: Release the correct number of gates in hi3519_clk_unregister() [ Upstream commit 74e39f526d95c0c119ada1874871ee328c59fbee ] The gates are stored in 'hi3519_gate_clks', not 'hi3519_mux_clks'. This is also in line with how hisi_clk_register_gate() is called in the probe. Fixes: 224b3b262c52 ("clk: hisilicon: hi3519: add driver remove path and fix some issues") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/c3f1877c9a0886fa35c949c8f0ef25547f284f18.1704912510.git.christophe.jaillet@wanadoo.fr Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/hisilicon/clk-hi3519.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/hisilicon/clk-hi3519.c b/drivers/clk/hisilicon/clk-hi3519.c index ad0c7f350cf03..60d8a27a90824 100644 --- a/drivers/clk/hisilicon/clk-hi3519.c +++ b/drivers/clk/hisilicon/clk-hi3519.c @@ -130,7 +130,7 @@ static void hi3519_clk_unregister(struct platform_device *pdev) of_clk_del_provider(pdev->dev.of_node); hisi_clk_unregister_gate(hi3519_gate_clks, - ARRAY_SIZE(hi3519_mux_clks), + ARRAY_SIZE(hi3519_gate_clks), crg->clk_data); hisi_clk_unregister_mux(hi3519_mux_clks, ARRAY_SIZE(hi3519_mux_clks), -- GitLab From e0b0d1c46a2ce1e46b79d004a7270fdef872e097 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jan 2024 16:16:24 +0100 Subject: [PATCH 0977/1418] clk: hisilicon: hi3559a: Fix an erroneous devm_kfree() [ Upstream commit 64c6a38136b74a2f18c42199830975edd9fbc379 ] 'p_clk' is an array allocated just before the for loop for all clk that need to be registered. It is incremented at each loop iteration. If a clk_register() call fails, 'p_clk' may point to something different from what should be freed. The best we can do, is to avoid this wrong release of memory. Fixes: 6c81966107dc ("clk: hisilicon: Add clock driver for hi3559A SoC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/773fc8425c3b8f5b0ca7c1d89f15b65831a85ca9.1705850155.git.christophe.jaillet@wanadoo.fr Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/hisilicon/clk-hi3559a.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 9ea1a80acbe8b..0272276550ff1 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -491,7 +491,6 @@ static void hisi_clk_register_pll(struct hi3559av100_pll_clock *clks, clk = clk_register(NULL, &p_clk->hw); if (IS_ERR(clk)) { - devm_kfree(dev, p_clk); dev_err(dev, "%s: failed to register clock %s\n", __func__, clks[i].name); continue; -- GitLab From 527480cab5ccd85beb355f7ac4a27903eabc7348 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 15 Dec 2023 12:33:55 +0300 Subject: [PATCH 0978/1418] drm/tegra: put drm_gem_object ref on error in tegra_fb_create [ Upstream commit 32e5a120a5105bce01561978ee55aee8e40ac0dc ] Inside tegra_fb_create(), drm_gem_object_lookup() increments ref count of the found object. But if the following size check fails then the last found object's ref count should be put there as the unreferencing loop can't detect this situation. Found by Linux Verification Center (linuxtesting.org). Fixes: de2ba664c30f ("gpu: host1x: drm: Add memory manager and fb") Signed-off-by: Fedor Pchelkin Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20231215093356.12067-1-pchelkin@ispras.ru Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 9291209154a7a..a688ecf08451e 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -166,6 +166,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, if (gem->size < size) { err = -EINVAL; + drm_gem_object_put(gem); goto unreference; } -- GitLab From 3eb47e41c2057fb6db5f73a60f35705450d80c08 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 20 Feb 2024 11:50:10 +0000 Subject: [PATCH 0979/1418] mfd: syscon: Call of_node_put() only when of_parse_phandle() takes a ref [ Upstream commit d2b0680cf3b05490b579e71b0df6e07451977745 ] of_parse_phandle() returns a device_node with refcount incremented, which the callee needs to call of_node_put() on when done. We should only call of_node_put() when the property argument is provided though as otherwise nothing has taken a reference on the node. Fixes: 45330bb43421 ("mfd: syscon: Allow property as NULL in syscon_regmap_lookup_by_phandle") Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20240220115012.471689-2-peter.griffin@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/syscon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 6196724ef39bb..ecfe151220919 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -223,7 +223,9 @@ struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, return ERR_PTR(-ENODEV); regmap = syscon_node_to_regmap(syscon_np); - of_node_put(syscon_np); + + if (property) + of_node_put(syscon_np); return regmap; } -- GitLab From 6be122c8d2151c5db0866fd64cbb893ca45bb5ab Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 20 Feb 2024 11:50:12 +0000 Subject: [PATCH 0980/1418] mfd: altera-sysmgr: Call of_node_put() only when of_parse_phandle() takes a ref [ Upstream commit e28c28a34ee9fa2ea671a20e5e7064e6220d55e7 ] of_parse_phandle() returns a device_node with refcount incremented, which the callee needs to call of_node_put() on when done. We should only call of_node_put() when the property argument is provided though as otherwise nothing has taken a reference on the node. Fixes: f36e789a1f8d ("mfd: altera-sysmgr: Add SOCFPGA System Manager") Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20240220115012.471689-4-peter.griffin@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/altera-sysmgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 5d3715a28b28e..dbe1009943718 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -110,7 +110,9 @@ struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np, dev = driver_find_device_by_of_node(&altr_sysmgr_driver.driver, (void *)sysmgr_np); - of_node_put(sysmgr_np); + if (property) + of_node_put(sysmgr_np); + if (!dev) return ERR_PTR(-EPROBE_DEFER); -- GitLab From 6a2ef97250681a5b104396bcc1e4a67f1b25ec28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 14:49:46 +0100 Subject: [PATCH 0981/1418] crypto: arm/sha - fix function cast warnings [ Upstream commit 53cc9baeb9bc2a187eb9c9790d30995148852b12 ] clang-16 warns about casting between incompatible function types: arch/arm/crypto/sha256_glue.c:37:5: error: cast from 'void (*)(u32 *, const void *, unsigned int)' (aka 'void (*)(unsigned int *, const void *, unsigned int)') to 'sha256_block_fn *' (aka 'void (*)(struct sha256_state *, const unsigned char *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 37 | (sha256_block_fn *)sha256_block_data_order); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/crypto/sha512-glue.c:34:3: error: cast from 'void (*)(u64 *, const u8 *, int)' (aka 'void (*)(unsigned long long *, const unsigned char *, int)') to 'sha512_block_fn *' (aka 'void (*)(struct sha512_state *, const unsigned char *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 34 | (sha512_block_fn *)sha512_block_data_order); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix the prototypes for the assembler functions to match the typedef. The code already relies on the digest being the first part of the state structure, so there is no change in behavior. Fixes: c80ae7ca3726 ("crypto: arm/sha512 - accelerated SHA-512 using ARM generic ASM and NEON") Fixes: b59e2ae3690c ("crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer") Signed-off-by: Arnd Bergmann Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/arm/crypto/sha256_glue.c | 13 +++++-------- arch/arm/crypto/sha512-glue.c | 12 +++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index 433ee4ddce6c8..f85933fdec75f 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -24,8 +24,8 @@ #include "sha256_glue.h" -asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); +asmlinkage void sha256_block_data_order(struct sha256_state *state, + const u8 *data, int num_blks); int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -33,23 +33,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, /* make sure casting to sha256_block_fn() is safe */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); - return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + return sha256_base_do_update(desc, data, len, sha256_block_data_order); } EXPORT_SYMBOL(crypto_sha256_arm_update); static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out) { - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, sha256_block_data_order); return sha256_base_finish(desc, out); } int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_update(desc, data, len, sha256_block_data_order); return crypto_sha256_arm_final(desc, out); } EXPORT_SYMBOL(crypto_sha256_arm_finup); diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 0635a65aa488b..1be5bd498af36 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -25,27 +25,25 @@ MODULE_ALIAS_CRYPTO("sha512"); MODULE_ALIAS_CRYPTO("sha384-arm"); MODULE_ALIAS_CRYPTO("sha512-arm"); -asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks); +asmlinkage void sha512_block_data_order(struct sha512_state *state, + u8 const *src, int blocks); int sha512_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + return sha512_base_do_update(desc, data, len, sha512_block_data_order); } static int sha512_arm_final(struct shash_desc *desc, u8 *out) { - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_finalize(desc, sha512_block_data_order); return sha512_base_finish(desc, out); } int sha512_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_update(desc, data, len, sha512_block_data_order); return sha512_arm_final(desc, out); } -- GitLab From 67355a760bf52c8623b8242d845cf20db4517eb4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 17 Feb 2024 08:55:13 -0800 Subject: [PATCH 0982/1418] crypto: jitter - fix CRYPTO_JITTERENTROPY help text [ Upstream commit e63df1ec9a16dd9e13e9068243e64876de06f795 ] Correct various small problems in the help text: a. change 2 spaces to ", " b. finish an incomplete sentence c. change non-working URL to working URL Fixes: a9a98d49da52 ("crypto: Kconfig - simplify compression/RNG entries") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218458 Signed-off-by: Randy Dunlap Cc: Bagas Sanjaya Cc: Robert Elliott Cc: Christoph Biedl Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Acked-by: Bagas Sanjaya Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index d779667671b23..edf193aff23e7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1285,10 +1285,11 @@ config CRYPTO_JITTERENTROPY A non-physical non-deterministic ("true") RNG (e.g., an entropy source compliant with NIST SP800-90B) intended to provide a seed to a - deterministic RNG (e.g. per NIST SP800-90C). + deterministic RNG (e.g., per NIST SP800-90C). This RNG does not perform any cryptographic whitening of the generated + random numbers. - See https://www.chronox.de/jent.html + See https://www.chronox.de/jent/ config CRYPTO_KDF800108_CTR tristate -- GitLab From c82d434069733ee19f8fa7271e3f7cc505620219 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 13 Feb 2024 10:16:36 +0200 Subject: [PATCH 0983/1418] drm/tidss: Fix initial plane zpos values [ Upstream commit 3ec948ccb2c4b99e8fbfdd950adbe92ea577b395 ] When the driver sets up the zpos property it sets the default zpos value to the HW id of the plane. That is fine as such, but as on many DSS versions the driver arranges the DRM planes in a different order than the HW planes (to keep the non-scalable planes first), this leads to odd initial zpos values. An example is J721e, where the initial zpos values for DRM planes are 1, 3, 0, 2. In theory the userspace should configure the zpos values properly when using multiple planes, and in that sense the initial zpos values shouldn't matter, but there's really no reason not to fix this and help the userspace apps which don't handle zpos perfectly. In particular, some versions of Weston seem to have issues dealing with the planes with the current default zpos values. So let's change the zpos values for the DRM planes to 0, 1, 2, 3. Another option would be to configure the planes marked as primary planes to zpos 0. On a two display system this would give us plane zpos values of 0, 0, 1, 2. The end result and behavior would be very similar in this option, and I'm not aware that this would actually help us in any way. So, to keep the code simple, I opted for the 0, 1, 2, 3 values. Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Reviewed-by: Aradhya Bhatia Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20240213-tidss-fixes-v1-1-d709e8dfa505@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tidss/tidss_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 42d50ec5526d7..435b3b66ae632 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -211,7 +211,7 @@ struct tidss_plane *tidss_plane_create(struct tidss_device *tidss, drm_plane_helper_add(&tplane->plane, &tidss_plane_helper_funcs); - drm_plane_create_zpos_property(&tplane->plane, hw_plane_id, 0, + drm_plane_create_zpos_property(&tplane->plane, tidss->num_planes, 0, num_planes - 1); ret = drm_plane_create_color_properties(&tplane->plane, -- GitLab From 82120c9ab4b8edc5d53bc63a693e15c4d1bac5e1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 13 Feb 2024 10:16:37 +0200 Subject: [PATCH 0984/1418] drm/tidss: Fix sync-lost issue with two displays [ Upstream commit c079e2e113f2ec2803ba859bbb442a6ab82c96bd ] A sync lost issue can be observed with two displays, when moving a plane from one disabled display to an another disabled display, and then enabling the display to which the plane was moved to. The exact requirements for this to trigger are not clear. It looks like the issue is that the layers are left enabled in the first display's OVR registers. Even if the corresponding VP is disabled, it still causes an issue, as if the disabled VP and its OVR would still be in use, leading to the same VID being used by two OVRs. However, this is just speculation based on testing the DSS behavior. Experimentation shows that as a workaround, we can disable all the layers in the OVR when disabling a VP. There should be no downside to this, as the OVR is anyway effectively disabled if its VP is disabled, and it seems to solve the sync lost issue. However, there may be a bigger issue in play here, related to J721e erratum i2097 ("DSS: Disabling a Layer Connected to Overlay May Result in Synclost During the Next Frame"). Experimentation also shows that the OVR's CHANNELIN field has similar issue. So we may need to revisit this when we find out more about the core issue. Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Reviewed-by: Aradhya Bhatia Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20240213-tidss-fixes-v1-2-d709e8dfa505@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tidss/tidss_crtc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index cb66a425dd200..896a77853ebc5 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -270,6 +270,16 @@ static void tidss_crtc_atomic_disable(struct drm_crtc *crtc, reinit_completion(&tcrtc->framedone_completion); + /* + * If a layer is left enabled when the videoport is disabled, and the + * vid pipeline that was used for the layer is taken into use on + * another videoport, the DSS will report sync lost issues. Disable all + * the layers here as a work-around. + */ + for (u32 layer = 0; layer < tidss->feat->num_planes; layer++) + dispc_ovr_enable_layer(tidss->dispc, tcrtc->hw_videoport, layer, + false); + dispc_vp_disable(tidss->dispc, tcrtc->hw_videoport); if (!wait_for_completion_timeout(&tcrtc->framedone_completion, -- GitLab From 438adcd017dc67782c8493ac3436b506798f2a6b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 8 Feb 2024 12:34:18 +0200 Subject: [PATCH 0985/1418] mtd: maps: physmap-core: fix flash size larger than 32-bit [ Upstream commit 3884f03edd34887514a0865a80769cd5362d5c3b ] mtd-ram can potentially be larger than 4GB. get_bitmask_order() uses fls() that is not guaranteed to work with values larger than 32-bit. Specifically on aarch64 fls() returns 0 when all 32 LSB bits are clear. Use fls64() instead. Fixes: ba32ce95cbd987 ("mtd: maps: Merge gpio-addr-flash.c into physmap-core.c") Signed-off-by: Baruch Siach Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/9fbf3664ce00f8b07867f1011834015f21d162a5.1707388458.git.baruch@tkos.co.il Signed-off-by: Sasha Levin --- drivers/mtd/maps/physmap-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 19dad5a23f944..8cdb3512107d3 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -524,7 +524,7 @@ static int physmap_flash_probe(struct platform_device *dev) if (!info->maps[i].phys) info->maps[i].phys = res->start; - info->win_order = get_bitmask_order(resource_size(res)) - 1; + info->win_order = fls64(resource_size(res)) - 1; info->maps[i].size = BIT(info->win_order + (info->gpios ? info->gpios->ndescs : 0)); -- GitLab From 5ac2ca7a50ed4b8df6dad6c345724862c1ec0f6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:00:09 +0100 Subject: [PATCH 0986/1418] mtd: rawnand: lpc32xx_mlc: fix irq handler prototype [ Upstream commit 347b828882e6334690e7003ce5e2fe5f233dc508 ] clang-16 warns about mismatched function prototypes: drivers/mtd/nand/raw/lpc32xx_mlc.c:783:29: error: cast from 'irqreturn_t (*)(int, struct lpc32xx_nand_host *)' (aka 'enum irqreturn (*)(int, struct lpc32xx_nand_host *)') to 'irq_handler_t' (aka 'enum irqreturn (*)(int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Change the interrupt handler to the normal way of just passing a void* pointer and converting it inside the function.. Fixes: 70f7cb78ec53 ("mtd: add LPC32xx MLC NAND driver") Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240213100146.455811-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/lpc32xx_mlc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c index 452ecaf7775ac..1cfe3dd0bad4d 100644 --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c @@ -303,8 +303,9 @@ static int lpc32xx_nand_device_ready(struct nand_chip *nand_chip) return 0; } -static irqreturn_t lpc3xxx_nand_irq(int irq, struct lpc32xx_nand_host *host) +static irqreturn_t lpc3xxx_nand_irq(int irq, void *data) { + struct lpc32xx_nand_host *host = data; uint8_t sr; /* Clear interrupt flag by reading status */ @@ -779,7 +780,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) goto release_dma_chan; } - if (request_irq(host->irq, (irq_handler_t)&lpc3xxx_nand_irq, + if (request_irq(host->irq, &lpc3xxx_nand_irq, IRQF_TRIGGER_HIGH, DRV_NAME, host)) { dev_err(&pdev->dev, "Error requesting NAND IRQ\n"); res = -ENXIO; -- GitLab From 83ee64af9bedccd3a18b90f6c2f637640a799c1a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 23 Feb 2024 18:51:07 +0100 Subject: [PATCH 0987/1418] ASoC: meson: axg-tdm-interface: fix mclk setup without mclk-fs [ Upstream commit e3741a8d28a1137f8b19ae6f3d6e3be69a454a0a ] By default, when mclk-fs is not provided, the tdm-interface driver requests an MCLK that is 4x the bit clock, SCLK. However there is no justification for this: * If the codec needs MCLK for its operation, mclk-fs is expected to be set according to the codec requirements. * If the codec does not need MCLK the minimum is 2 * SCLK, because this is minimum the divider between SCLK and MCLK can do. Multiplying by 4 may cause problems because the PLL limit may be reached sooner than it should, so use 2x instead. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20240223175116.2005407-2-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-tdm-interface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c040c83637e02..eb188ee950557 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -266,8 +266,8 @@ static int axg_tdm_iface_set_sclk(struct snd_soc_dai *dai, srate = iface->slots * iface->slot_width * params_rate(params); if (!iface->mclk_rate) { - /* If no specific mclk is requested, default to bit clock * 4 */ - clk_set_rate(iface->mclk, 4 * srate); + /* If no specific mclk is requested, default to bit clock * 2 */ + clk_set_rate(iface->mclk, 2 * srate); } else { /* Check if we can actually get the bit clock from mclk */ if (iface->mclk_rate % srate) { -- GitLab From ff4dd05a9415560128c723abff06a77ba74d92c1 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 23 Feb 2024 18:51:08 +0100 Subject: [PATCH 0988/1418] ASoC: meson: axg-tdm-interface: add frame rate constraint [ Upstream commit 59c6a3a43b221cc2a211181b1298e43b2c2df782 ] According to Amlogic datasheets for the SoCs supported by this driver, the maximum bit clock rate is 100MHz. The tdm interface allows the rates listed by the DAI driver, regardless of the number slots or their width. However, these will impact the bit clock rate. Hitting the 100MHz limit is very unlikely for most use cases but it is possible. For example with 32 slots / 32 bits wide, the maximum rate is no longer 384kHz but ~96kHz. Add the constraint accordingly if the component is not already active. If it is active, the rate is already constrained by the first stream rate. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20240223175116.2005407-3-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-tdm-interface.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index eb188ee950557..028383f949efd 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -12,6 +12,9 @@ #include "axg-tdm.h" +/* Maximum bit clock frequency according the datasheets */ +#define MAX_SCLK 100000000 /* Hz */ + enum { TDM_IFACE_PAD, TDM_IFACE_LOOPBACK, @@ -155,19 +158,27 @@ static int axg_tdm_iface_startup(struct snd_pcm_substream *substream, return -EINVAL; } - /* Apply component wide rate symmetry */ if (snd_soc_component_active(dai->component)) { + /* Apply component wide rate symmetry */ ret = snd_pcm_hw_constraint_single(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, iface->rate); - if (ret < 0) { - dev_err(dai->dev, - "can't set iface rate constraint\n"); - return ret; - } + + } else { + /* Limit rate according to the slot number and width */ + unsigned int max_rate = + MAX_SCLK / (iface->slots * iface->slot_width); + ret = snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + 0, max_rate); } - return 0; + if (ret < 0) + dev_err(dai->dev, "can't set iface rate constraint\n"); + else + ret = 0; + + return ret; } static int axg_tdm_iface_set_stream(struct snd_pcm_substream *substream, -- GitLab From c6a05e45de82e3481f8f8ac4d3e655706fcae74a Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 14 Feb 2024 20:11:41 +0530 Subject: [PATCH 0989/1418] HID: amd_sfh: Update HPD sensor structure elements [ Upstream commit bbf0dec30696638b8bdc28cb2f5bf23f8d760b52 ] HPD sensor data is not populating properly because of wrong order of HPD sensor structure elements. So update the order of structure elements to match the HPD sensor data received from the firmware. Fixes: 24a31ea94922 ("HID: amd_sfh: Add initial support for HPD sensor") Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index dfb7cabd82efe..2b125cd9742cb 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -89,10 +89,10 @@ enum mem_use_type { struct hpd_status { union { struct { - u32 human_presence_report : 4; - u32 human_presence_actual : 4; - u32 probablity : 8; u32 object_distance : 16; + u32 probablity : 8; + u32 human_presence_actual : 4; + u32 human_presence_report : 4; } shpd; u32 val; }; -- GitLab From fcb7e704827e14510dc365f19782579fbafcaad2 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 14 Feb 2024 20:11:42 +0530 Subject: [PATCH 0990/1418] HID: amd_sfh: Avoid disabling the interrupt [ Upstream commit c1db0073212ef39d5a46c2aea5e49bf884375ce4 ] HP ProBook x360 435 G7 using older version of firmware which doesn't support disabling the interrupt for all commands. Hence avoid disabling the interrupt for that particular model. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218104 Fixes: b300667b33b2 ("HID: amd_sfh: Disable the interrupt for all command") Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 30 +++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index c936d6a51c0cd..9c963ad27f9d1 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -34,6 +34,8 @@ static int sensor_mask_override = -1; module_param_named(sensor_mask, sensor_mask_override, int, 0444); MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask"); +static bool intr_disable = true; + static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts) { union cmd_response cmd_resp; @@ -54,7 +56,7 @@ static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sen cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = ENABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = info.period; cmd_base.cmd_v2.sensor_id = info.sensor_idx; cmd_base.cmd_v2.length = 16; @@ -72,7 +74,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = DISABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; @@ -86,7 +88,7 @@ static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata) union sfh_cmd_base cmd_base; cmd_base.cmd_v2.cmd_id = STOP_ALL_SENSORS; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = 0; @@ -288,6 +290,26 @@ int amd_sfh_irq_init(struct amd_mp2_dev *privdata) return 0; } +static int mp2_disable_intr(const struct dmi_system_id *id) +{ + intr_disable = false; + return 0; +} + +static const struct dmi_system_id dmi_sfh_table[] = { + { + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=218104 + */ + .callback = mp2_disable_intr, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook x360 435 G7"), + }, + }, + {} +}; + static const struct dmi_system_id dmi_nodevs[] = { { /* @@ -311,6 +333,8 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i if (dmi_first_match(dmi_nodevs)) return -ENODEV; + dmi_check_system(dmi_sfh_table); + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); if (!privdata) return -ENOMEM; -- GitLab From 0a70199742c25694862b9ac09d0cb9d5e33de6fc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 24 Feb 2024 07:48:52 +0530 Subject: [PATCH 0991/1418] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7cf1ad2fe10634238b38442a851d89514cb14ea2 ] Missing break statement in the ATOM_ARG_IMM case of a switch statement, adds the missing break statement, ensuring that the program's control flow is as intended. Fixes the below: drivers/gpu/drm/amd/amdgpu/atom.c:323 atom_get_src_int() warn: ignoring unreachable code. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Cc: Jammy Zhou Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 1c5d9388ad0bb..cb6eb47aab65b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -313,7 +313,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; -- GitLab From 3a1ec89708d2e57e2712f46241282961b1a7a475 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 16 Feb 2024 15:30:47 +0800 Subject: [PATCH 0992/1418] media: pvrusb2: fix uaf in pvr2_context_set_notify [ Upstream commit 0a0b79ea55de8514e1750884e5fec77f9fdd01ee ] [Syzbot reported] BUG: KASAN: slab-use-after-free in pvr2_context_set_notify+0x2c4/0x310 drivers/media/usb/pvrusb2/pvrusb2-context.c:35 Read of size 4 at addr ffff888113aeb0d8 by task kworker/1:1/26 CPU: 1 PID: 26 Comm: kworker/1:1 Not tainted 6.8.0-rc1-syzkaller-00046-gf1a27f081c1f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x620 mm/kasan/report.c:488 kasan_report+0xda/0x110 mm/kasan/report.c:601 pvr2_context_set_notify+0x2c4/0x310 drivers/media/usb/pvrusb2/pvrusb2-context.c:35 pvr2_context_notify drivers/media/usb/pvrusb2/pvrusb2-context.c:95 [inline] pvr2_context_disconnect+0x94/0xb0 drivers/media/usb/pvrusb2/pvrusb2-context.c:272 Freed by task 906: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3f/0x60 mm/kasan/generic.c:640 poison_slab_object mm/kasan/common.c:241 [inline] __kasan_slab_free+0x106/0x1b0 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x105/0x340 mm/slub.c:4409 pvr2_context_check drivers/media/usb/pvrusb2/pvrusb2-context.c:137 [inline] pvr2_context_thread_func+0x69d/0x960 drivers/media/usb/pvrusb2/pvrusb2-context.c:158 [Analyze] Task A set disconnect_flag = !0, which resulted in Task B's condition being met and releasing mp, leading to this issue. [Fix] Place the disconnect_flag assignment operation after all code in pvr2_context_disconnect() to avoid this issue. Reported-and-tested-by: syzbot+ce750e124675d4599449@syzkaller.appspotmail.com Fixes: e5be15c63804 ("V4L/DVB (7711): pvrusb2: Fix race on module unload") Signed-off-by: Edward Adam Davis Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 58f2f3ff10ee2..73c95ba2328a4 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -267,9 +267,9 @@ static void pvr2_context_exit(struct pvr2_context *mp) void pvr2_context_disconnect(struct pvr2_context *mp) { pvr2_hdw_disconnect(mp->hdw); - mp->disconnect_flag = !0; if (!pvr2_context_shutok()) pvr2_context_notify(mp); + mp->disconnect_flag = !0; } -- GitLab From 107052a8cfeff3a97326277192b4f052e4860a8a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2024 17:31:44 +0100 Subject: [PATCH 0993/1418] media: dvb-frontends: avoid stack overflow warnings with clang [ Upstream commit 7a4cf27d1f0538f779bf31b8c99eda394e277119 ] A previous patch worked around a KASAN issue in stv0367, now a similar problem showed up with clang: drivers/media/dvb-frontends/stv0367.c:1222:12: error: stack frame size (3624) exceeds limit (2048) in 'stv0367ter_set_frontend' [-Werror,-Wframe-larger-than] 1214 | static int stv0367ter_set_frontend(struct dvb_frontend *fe) Rework the stv0367_writereg() function to be simpler and mark both register access functions as noinline_for_stack so the temporary i2c_msg structures do not get duplicated on the stack when KASAN_STACK is enabled. Fixes: 3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN") Signed-off-by: Arnd Bergmann Reviewed-by: Justin Stitt Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/stv0367.c | 34 +++++++-------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index 04556b77c16c9..0977564a4a1a4 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -118,50 +118,32 @@ static const s32 stv0367cab_RF_LookUp2[RF_LOOKUP_TABLE2_SIZE][RF_LOOKUP_TABLE2_S } }; -static -int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) +static noinline_for_stack +int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - u8 buf[MAX_XFER_SIZE]; + u8 buf[3] = { MSB(reg), LSB(reg), data }; struct i2c_msg msg = { .addr = state->config->demod_address, .flags = 0, .buf = buf, - .len = len + 2 + .len = 3, }; int ret; - if (2 + len > sizeof(buf)) { - printk(KERN_WARNING - "%s: i2c wr reg=%04x: len=%d is too big!\n", - KBUILD_MODNAME, reg, len); - return -EINVAL; - } - - - buf[0] = MSB(reg); - buf[1] = LSB(reg); - memcpy(buf + 2, data, len); - if (i2cdebug) printk(KERN_DEBUG "%s: [%02x] %02x: %02x\n", __func__, - state->config->demod_address, reg, buf[2]); + state->config->demod_address, reg, data); ret = i2c_transfer(state->i2c, &msg, 1); if (ret != 1) printk(KERN_ERR "%s: i2c write error! ([%02x] %02x: %02x)\n", - __func__, state->config->demod_address, reg, buf[2]); + __func__, state->config->demod_address, reg, data); return (ret != 1) ? -EREMOTEIO : 0; } -static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) -{ - u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ - - return stv0367_writeregs(state, reg, &tmp, 1); -} - -static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) +static noinline_for_stack +u8 stv0367_readreg(struct stv0367_state *state, u16 reg) { u8 b0[] = { 0, 0 }; u8 b1[] = { 0 }; -- GitLab From e04d15c8bb3e111dd69f98894acd92d63e87aac3 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 21 Feb 2024 12:37:13 +0800 Subject: [PATCH 0994/1418] media: go7007: fix a memleak in go7007_load_encoder [ Upstream commit b9b683844b01d171a72b9c0419a2d760d946ee12 ] In go7007_load_encoder, bounce(i.e. go->boot_fw), is allocated without a deallocation thereafter. After the following call chain: saa7134_go7007_init |-> go7007_boot_encoder |-> go7007_load_encoder |-> kfree(go) go is freed and thus bounce is leaked. Fixes: 95ef39403f89 ("[media] go7007: remember boot firmware") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/go7007/go7007-driver.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/go7007/go7007-driver.c b/drivers/media/usb/go7007/go7007-driver.c index 0c24e29843048..eb03f98b2ef11 100644 --- a/drivers/media/usb/go7007/go7007-driver.c +++ b/drivers/media/usb/go7007/go7007-driver.c @@ -80,7 +80,7 @@ static int go7007_load_encoder(struct go7007 *go) const struct firmware *fw_entry; char fw_name[] = "go7007/go7007fw.bin"; void *bounce; - int fw_len, rv = 0; + int fw_len; u16 intr_val, intr_data; if (go->boot_fw == NULL) { @@ -109,9 +109,11 @@ static int go7007_load_encoder(struct go7007 *go) go7007_read_interrupt(go, &intr_val, &intr_data) < 0 || (intr_val & ~0x1) != 0x5a5a) { v4l2_err(go, "error transferring firmware\n"); - rv = -1; + kfree(go->boot_fw); + go->boot_fw = NULL; + return -1; } - return rv; + return 0; } MODULE_FIRMWARE("go7007/go7007fw.bin"); -- GitLab From 55ca0c7eae8499bb96f4e5d9b26af95e89c4e6a0 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 21 Feb 2024 13:17:04 +0800 Subject: [PATCH 0995/1418] media: ttpci: fix two memleaks in budget_av_attach [ Upstream commit d0b07f712bf61e1a3cf23c87c663791c42e50837 ] When saa7146_register_device and saa7146_vv_init fails, budget_av_attach should free the resources it allocates, like the error-handling of ttpci_budget_init does. Besides, there are two fixme comment refers to such deallocations. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../staging/media/deprecated/saa7146/ttpci/budget-av.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c index 0c61a2dec2211..81fc4835679f3 100644 --- a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c +++ b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c @@ -1462,7 +1462,8 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio budget_av->has_saa7113 = 1; err = saa7146_vv_init(dev, &vv_data); if (err != 0) { - /* fixme: proper cleanup here */ + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); ERR("cannot init vv subsystem\n"); return err; } @@ -1471,9 +1472,10 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio vv_data.vid_ops.vidioc_s_input = vidioc_s_input; if ((err = saa7146_register_device(&budget_av->vd, dev, "knc1", VFL_TYPE_VIDEO))) { - /* fixme: proper cleanup here */ - ERR("cannot register capture v4l2 device\n"); saa7146_vv_release(dev); + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); + ERR("cannot register capture v4l2 device\n"); return err; } -- GitLab From 32bfbab72aaa16aa08cb058d96a4d9afd3d5037f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 13:10:22 +0100 Subject: [PATCH 0996/1418] media: mediatek: vcodec: avoid -Wcast-function-type-strict warning [ Upstream commit bfb1b99802ef16045402deb855c197591dc78886 ] The ipi handler here tries hard to maintain const-ness of its argument, but by doing that causes a warning about function type casts: drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c:38:32: error: cast from 'mtk_vcodec_ipi_handler' (aka 'void (*)(void *, unsigned int, void *)') to 'ipi_handler_t' (aka 'void (*)(const void *, unsigned int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 38 | ipi_handler_t handler_const = (ipi_handler_t)handler; | ^~~~~~~~~~~~~~~~~~~~~~ Remove the hack and just use a non-const argument. Fixes: bf1d556ad4e0 ("media: mtk-vcodec: abstract firmware interface") Signed-off-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c | 2 +- .../media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c | 10 +--------- drivers/media/platform/mediatek/vpu/mtk_vpu.c | 2 +- drivers/media/platform/mediatek/vpu/mtk_vpu.h | 2 +- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c index b065ccd069140..378a1cba0144f 100644 --- a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c +++ b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c @@ -26,7 +26,7 @@ static void mtk_mdp_vpu_handle_init_ack(const struct mdp_ipi_comm_ack *msg) vpu->inst_addr = msg->vpu_inst_addr; } -static void mtk_mdp_vpu_ipi_handler(const void *data, unsigned int len, +static void mtk_mdp_vpu_ipi_handler(void *data, unsigned int len, void *priv) { const struct mdp_ipi_comm_ack *msg = data; diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c index cfc7ebed8fb7a..1ec29f1b163a1 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c @@ -29,15 +29,7 @@ static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id, mtk_vcodec_ipi_handler handler, const char *name, void *priv) { - /* - * The handler we receive takes a void * as its first argument. We - * cannot change this because it needs to be passed down to the rproc - * subsystem when SCP is used. VPU takes a const argument, which is - * more constrained, so the conversion below is safe. - */ - ipi_handler_t handler_const = (ipi_handler_t)handler; - - return vpu_ipi_register(fw->pdev, id, handler_const, name, priv); + return vpu_ipi_register(fw->pdev, id, handler, name, priv); } static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf, diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.c b/drivers/media/platform/mediatek/vpu/mtk_vpu.c index 6beab9e86a22a..44adf5cfc9bb2 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.c +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.c @@ -635,7 +635,7 @@ OUT_LOAD_FW: } EXPORT_SYMBOL_GPL(vpu_load_firmware); -static void vpu_init_ipi_handler(const void *data, unsigned int len, void *priv) +static void vpu_init_ipi_handler(void *data, unsigned int len, void *priv) { struct mtk_vpu *vpu = priv; const struct vpu_run *run = data; diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.h b/drivers/media/platform/mediatek/vpu/mtk_vpu.h index a56053ff135af..da05f3e740810 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.h +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.h @@ -17,7 +17,7 @@ * VPU interfaces with other blocks by share memory and interrupt. */ -typedef void (*ipi_handler_t) (const void *data, +typedef void (*ipi_handler_t) (void *data, unsigned int len, void *priv); -- GitLab From e9eee1ce1b6cb44e0bf625533d8a21f58d3780e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 28 Feb 2024 12:28:03 +0100 Subject: [PATCH 0997/1418] gpio: nomadik: fix offset bug in nmk_pmx_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 53cf6b72e074864b94ade97dcb6f30b5ac1a82dc ] Previously, the statement looked like: slpm[x] &= ~BIT(g->grp.pins[i]); Where: - slpm is a unsigned int pointer; - g->grp.pins[i] is a pin number. It can grow to more than 32. The expected shift amount is a pin bank offset. This bug does not occur on every group or pin: the altsetting must be NMK_GPIO_ALT_C and the pin must be 32 or above. It might have occured. For example, in pinctrl-nomadik-db8500.c, pin group i2c3_c_2 has the right altsetting and pins 229 and 230. Fixes: dbfe8ca259e1 ("pinctrl/nomadik: implement pin multiplexing") Reviewed-by: Linus Walleij Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20240228-mbly-gpio-v2-5-3ba757474006@bootlin.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index f7d02513d8cc1..e79037dc85796 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1571,8 +1571,10 @@ static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function, * Then mask the pins that need to be sleeping now when we're * switching to the ALT C function. */ - for (i = 0; i < g->grp.npins; i++) - slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(g->grp.pins[i]); + for (i = 0; i < g->grp.npins; i++) { + unsigned int bit = g->grp.pins[i] % NMK_GPIO_PER_CHIP; + slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(bit); + } nmk_gpio_glitch_slpm_init(slpm); } -- GitLab From d2bd30c710475b2e29288827d2c91f9e6e2b91d7 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 23 Feb 2024 13:23:29 -0800 Subject: [PATCH 0998/1418] drm/mediatek: Fix a null pointer crash in mtk_drm_crtc_finish_page_flip [ Upstream commit c958e86e9cc1b48cac004a6e245154dfba8e163b ] It's possible that mtk_crtc->event is NULL in mtk_drm_crtc_finish_page_flip(). pending_needs_vblank value is set by mtk_crtc->event, but in mtk_drm_crtc_atomic_flush(), it's is not guarded by the same lock in mtk_drm_finish_page_flip(), thus a race condition happens. Consider the following case: CPU1 CPU2 step 1: mtk_drm_crtc_atomic_begin() mtk_crtc->event is not null, step 1: mtk_drm_crtc_atomic_flush: mtk_drm_crtc_update_config( !!mtk_crtc->event) step 2: mtk_crtc_ddp_irq -> mtk_drm_finish_page_flip: lock mtk_crtc->event set to null, pending_needs_vblank set to false unlock pending_needs_vblank set to true, step 2: mtk_crtc_ddp_irq -> mtk_drm_finish_page_flip called again, pending_needs_vblank is still true //null pointer Instead of guarding the entire mtk_drm_crtc_atomic_flush(), it's more efficient to just check if mtk_crtc->event is null before use. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Hsin-Yi Wang Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20240223212404.3709690-1-hsinyi@chromium.org/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 558000db4a100..beaaf44004cfd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -91,11 +91,13 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) struct drm_crtc *crtc = &mtk_crtc->base; unsigned long flags; - spin_lock_irqsave(&crtc->dev->event_lock, flags); - drm_crtc_send_vblank_event(crtc, mtk_crtc->event); - drm_crtc_vblank_put(crtc); - mtk_crtc->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + if (mtk_crtc->event) { + spin_lock_irqsave(&crtc->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, mtk_crtc->event); + drm_crtc_vblank_put(crtc); + mtk_crtc->event = NULL; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + } } static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) -- GitLab From a3f22feb2220a945d1c3282e34199e8bcdc5afc4 Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Thu, 8 Dec 2022 21:34:49 +0800 Subject: [PATCH 0999/1418] powerpc/pseries: Fix potential memleak in papr_get_attr() [ Upstream commit cda9c0d556283e2d4adaa9960b2dc19b16156bae ] `buf` is allocated in papr_get_attr(), and krealloc() of `buf` could fail. We need to free the original `buf` in the case of failure. Fixes: 3c14b73454cf ("powerpc/pseries: Interface to represent PAPR firmware attributes") Signed-off-by: Qiheng Lin Signed-off-by: Michael Ellerman Link: https://msgid.link/20221208133449.16284-1-linqiheng@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/papr_platform_attributes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 526c621b098be..eea2041b270b5 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -101,10 +101,12 @@ retry: esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); - if (temp_buf) + if (temp_buf) { buf = temp_buf; - else - return -ENOMEM; + } else { + ret = -ENOMEM; + goto out_buf; + } goto retry; } -- GitLab From 48feaf23f3a4a4fe020976fa7aedc4a2205e16e4 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 29 Feb 2024 17:58:47 +0530 Subject: [PATCH 1000/1418] powerpc/hv-gpci: Fix the H_GET_PERF_COUNTER_INFO hcall return value checks [ Upstream commit ad86d7ee43b22aa2ed60fb982ae94b285c1be671 ] Running event hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ in one of the system throws below error: ---Logs--- # perf list | grep hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=?/[Kernel PMU event] # perf stat -v -e hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ sleep 2 Using CPUID 00800200 Control descriptor is not initialized Warning: hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ event is not supported by the kernel. failed to read counter hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ Performance counter stats for 'system wide': hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ 2.000700771 seconds time elapsed The above error is because of the hcall failure as required permission "Enable Performance Information Collection" is not set. Based on current code, single_gpci_request function did not check the error type incase hcall fails and by default returns EINVAL. But we can have other reasons for hcall failures like H_AUTHORITY/H_PARAMETER with detail_rc as GEN_BUF_TOO_SMALL, for which we need to act accordingly. Fix this issue by adding new checks in the single_gpci_request and h_gpci_event_init functions. Result after fix patch changes: # perf stat -e hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ sleep 2 Error: No permission to enable hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ event. Fixes: 220a0c609ad1 ("powerpc/perf: Add support for the hv gpci (get performance counter info) interface") Reported-by: Akanksha J N Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229122847.101162-1-kjain@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/perf/hv-gpci.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ff8ff3509f5f..943248a0e9a9d 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,6 +164,20 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + /* + * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', + * specifies that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * Since in this function we are only accessing data for a given starting index, + * we don't need to accommodate whole data and can get required count by + * accessing first entry data. + * Hence hcall fails only incase the ret value is other than H_SUCCESS or + * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). + */ + if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) + ret = 0; + if (ret) { pr_devel("hcall failed: 0x%lx\n", ret); goto out; @@ -228,6 +242,7 @@ static int h_gpci_event_init(struct perf_event *event) { u64 count; u8 length; + unsigned long ret; /* Not our event */ if (event->attr.type != event->pmu->type) @@ -258,13 +273,23 @@ static int h_gpci_event_init(struct perf_event *event) } /* check if the request works... */ - if (single_gpci_request(event_get_request(event), + ret = single_gpci_request(event_get_request(event), event_get_starting_index(event), event_get_secondary_index(event), event_get_counter_info_version(event), event_get_offset(event), length, - &count)) { + &count); + + /* + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + if (ret) { pr_devel("gpci hcall failed\n"); return -EINVAL; } -- GitLab From 0189516bb53699c4ba6814a2da8da818e8780cbb Mon Sep 17 00:00:00 2001 From: Paloma Arellano Date: Thu, 22 Feb 2024 11:39:47 -0800 Subject: [PATCH 1001/1418] drm/msm/dpu: add division of drm_display_mode's hskew parameter [ Upstream commit 551ee0f210991d25f336bc27262353bfe99d3eed ] Setting up the timing engine when the physical encoder has a split role neglects dividing the drm_display_mode's hskew parameter. Let's fix this since this must also be done in preparation for implementing YUV420 over DP. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Paloma Arellano Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/579605/ Link: https://lore.kernel.org/r/20240222194025.25329-3-quic_parellan@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 09aeec00bf5e2..2baade1cd4876 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -257,12 +257,14 @@ static void dpu_encoder_phys_vid_setup_timing_engine( mode.htotal >>= 1; mode.hsync_start >>= 1; mode.hsync_end >>= 1; + mode.hskew >>= 1; DPU_DEBUG_VIDENC(phys_enc, - "split_role %d, halve horizontal %d %d %d %d\n", + "split_role %d, halve horizontal %d %d %d %d %d\n", phys_enc->split_role, mode.hdisplay, mode.htotal, - mode.hsync_start, mode.hsync_end); + mode.hsync_start, mode.hsync_end, + mode.hskew); } drm_mode_to_intf_timing_params(phys_enc, &mode, &timing_params); -- GitLab From c550f0055c5ddcbe9a909becb774c37c54c217be Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Feb 2024 10:35:46 +0800 Subject: [PATCH 1002/1418] modules: wait do_free_init correctly [ Upstream commit 8f8cd6c0a43ed637e620bbe45a8d0e0c2f4d5130 ] The synchronization here is to ensure the ordering of freeing of a module init so that it happens before W+X checking. It is worth noting it is not that the freeing was not happening, it is just that our sanity checkers raced against the permission checkers which assume init memory is already gone. Commit 1a7b7d922081 ("modules: Use vmalloc special flag") moved calling do_free_init() into a global workqueue instead of relying on it being called through call_rcu(..., do_free_init), which used to allowed us call do_free_init() asynchronously after the end of a subsequent grace period. The move to a global workqueue broke the gaurantees for code which needed to be sure the do_free_init() would complete with rcu_barrier(). To fix this callers which used to rely on rcu_barrier() must now instead use flush_work(&init_free_wq). Without this fix, we still could encounter false positive reports in W+X checking since the rcu_barrier() here can not ensure the ordering now. Even worse, the rcu_barrier() can introduce significant delay. Eric Chanudet reported that the rcu_barrier introduces ~0.1s delay on a PREEMPT_RT kernel. [ 0.291444] Freeing unused kernel memory: 5568K [ 0.402442] Run /sbin/init as init process With this fix, the above delay can be eliminated. Link: https://lkml.kernel.org/r/20240227023546.2490667-1-changbin.du@huawei.com Fixes: 1a7b7d922081 ("modules: Use vmalloc special flag") Signed-off-by: Changbin Du Tested-by: Eric Chanudet Acked-by: Luis Chamberlain Cc: Xiaoyi Su Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/linux/moduleloader.h | 8 ++++++++ init/main.c | 5 +++-- kernel/module/main.c | 9 +++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1322652a9d0d9..7dc186ec52a29 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -95,6 +95,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/init/main.c b/init/main.c index 87a52bdb41d67..ccde19e7275fa 100644 --- a/init/main.c +++ b/init/main.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -1473,11 +1474,11 @@ static void mark_readonly(void) if (rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/module/main.c b/kernel/module/main.c index 7a376e26de85b..554aba47ab689 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2434,6 +2434,11 @@ static void do_free_init(struct work_struct *w) } } +void flush_module_init_free_work(void) +{ + flush_work(&init_free_wq); +} + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." /* Default value for module->async_probe_requested */ @@ -2524,8 +2529,8 @@ static noinline int do_init_module(struct module *mod) * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to - * be cleaned up needs to sync with the queued work - ie - * rcu_barrier() + * be cleaned up needs to sync with the queued work by invoking + * flush_module_init_free_work(). */ if (llist_add(&freeinit->node, &init_free_list)) schedule_work(&init_free_wq); -- GitLab From c2c32faae82fd08b27b2f8b782acefe949638774 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Mar 2024 23:34:08 +1100 Subject: [PATCH 1003/1418] powerpc/embedded6xx: Fix no previous prototype for avr_uart_send() etc. [ Upstream commit 20933531be0577cdd782216858c26150dbc7936f ] Move the prototypes into mpc10x.h which is included by all the relevant C files, fixes: arch/powerpc/platforms/embedded6xx/ls_uart.c:59:6: error: no previous prototype for 'avr_uart_configure' arch/powerpc/platforms/embedded6xx/ls_uart.c:82:6: error: no previous prototype for 'avr_uart_send' Signed-off-by: Michael Ellerman Link: https://msgid.link/20240305123410.3306253-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/platforms/embedded6xx/linkstation.c | 3 --- arch/powerpc/platforms/embedded6xx/mpc10x.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 1830e1ac1f8f0..107a8b60ad0c9 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -99,9 +99,6 @@ static void __init linkstation_init_IRQ(void) mpic_init(mpic); } -extern void avr_uart_configure(void); -extern void avr_uart_send(const char); - static void __noreturn linkstation_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h index 5ad12023e5628..ebc258fa4858d 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc10x.h +++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h @@ -156,4 +156,7 @@ int mpc10x_disable_store_gathering(struct pci_controller *hose); /* For MPC107 boards that use the built-in openpic */ void mpc10x_set_openpic(void); +void avr_uart_configure(void); +void avr_uart_send(const char c); + #endif /* __PPC_KERNEL_MPC10X_H */ -- GitLab From d1f384e4c2011a90c2025fabe95476c6fdb6888d Mon Sep 17 00:00:00 2001 From: George Stark Date: Thu, 14 Dec 2023 20:36:05 +0300 Subject: [PATCH 1004/1418] leds: aw2013: Unlock mutex before destroying it [ Upstream commit 6969d0a2ba1adc9ba6a49b9805f24080896c255c ] In the probe() callback in case of error mutex is destroyed being locked which is not allowed so unlock the mutex before destroying. Fixes: 59ea3c9faf32 ("leds: add aw2013 driver") Signed-off-by: George Stark Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231214173614.2820929-2-gnstark@salutedevices.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/leds-aw2013.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/leds/leds-aw2013.c b/drivers/leds/leds-aw2013.c index 0b52fc9097c6e..3c05958578a1c 100644 --- a/drivers/leds/leds-aw2013.c +++ b/drivers/leds/leds-aw2013.c @@ -397,6 +397,7 @@ error_reg: regulator_disable(chip->vcc_regulator); error: + mutex_unlock(&chip->mutex); mutex_destroy(&chip->mutex); return ret; } -- GitLab From dd7f2d0f6187da4655820d4daf28fe935c79c423 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 17 Feb 2024 20:11:30 +0100 Subject: [PATCH 1005/1418] leds: sgm3140: Add missing timer cleanup and flash gpio control [ Upstream commit 205c29887a333ee4b37596e6533373e38cb23947 ] Enabling strobe and then setting brightness to 0 causes the driver to enter invalid state after strobe end timer fires. We should cancel strobe mode resources when changing brightness (aka torch mode). Fixes: cef8ec8cbd21 ("leds: add sgm3140 driver") Signed-off-by: Ondrej Jirman Link: https://lore.kernel.org/r/20240217191133.1757553-1-megi@xff.cz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/flash/leds-sgm3140.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/leds/flash/leds-sgm3140.c b/drivers/leds/flash/leds-sgm3140.c index d3a30ad94ac46..dd5d327c52a10 100644 --- a/drivers/leds/flash/leds-sgm3140.c +++ b/drivers/leds/flash/leds-sgm3140.c @@ -114,8 +114,11 @@ static int sgm3140_brightness_set(struct led_classdev *led_cdev, "failed to enable regulator: %d\n", ret); return ret; } + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 1); } else { + del_timer_sync(&priv->powerdown_timer); + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 0); ret = regulator_disable(priv->vin_regulator); if (ret) { -- GitLab From 491ec4f4544373277b4e7ca35525d0d167d6c8a2 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 20 Feb 2024 00:11:19 +0100 Subject: [PATCH 1006/1418] backlight: lm3630a: Initialize backlight_properties on init [ Upstream commit ad9aeb0e3aa90ebdad5fabf9c21783740eb95907 ] The backlight_properties struct should be initialized to zero before using, otherwise there will be some random values in the struct. Fixes: 0c2a665a648e ("backlight: add Backlight driver for lm3630 chip") Signed-off-by: Luca Weiss Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220-lm3630a-fixups-v1-1-9ca62f7e4a33@z3ntu.xyz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3630a_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 475f35635bf67..855c275754578 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -339,6 +339,7 @@ static int lm3630a_backlight_register(struct lm3630a_chip *pchip) struct backlight_properties props; const char *label; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; if (pdata->leda_ctrl != LM3630A_LEDA_DISABLE) { props.brightness = pdata->leda_init_brt; -- GitLab From d6e321dcd7d42c492b305db50376bfa070b9b0cd Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 20 Feb 2024 00:11:20 +0100 Subject: [PATCH 1007/1418] backlight: lm3630a: Don't set bl->props.brightness in get_brightness [ Upstream commit 4bf7ddd2d2f0f8826f25f74c7eba4e2c323a1446 ] There's no need to set bl->props.brightness, the get_brightness function is just supposed to return the current brightness and not touch the struct. With that done we can also remove the 'goto out' and just return the value. Fixes: 0c2a665a648e ("backlight: add Backlight driver for lm3630 chip") Signed-off-by: Luca Weiss Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220-lm3630a-fixups-v1-2-9ca62f7e4a33@z3ntu.xyz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3630a_bl.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 855c275754578..0d43f6326750f 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -231,7 +231,7 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -242,11 +242,8 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_A); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; @@ -306,7 +303,7 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -317,11 +314,8 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_B); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; -- GitLab From 6d7e897358fc1f15d25490376d9d4a3a57bc9782 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:24 +0000 Subject: [PATCH 1008/1418] backlight: da9052: Fully initialize backlight_properties during probe [ Upstream commit 0285e9efaee8276305db5c52a59baf84e9731556 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: 6ede3d832aaa ("backlight: add driver for DA9052/53 PMIC v1") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-2-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/da9052_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/da9052_bl.c b/drivers/video/backlight/da9052_bl.c index 882359dd288c0..aa00379392a0f 100644 --- a/drivers/video/backlight/da9052_bl.c +++ b/drivers/video/backlight/da9052_bl.c @@ -117,6 +117,7 @@ static int da9052_backlight_probe(struct platform_device *pdev) wleds->led_reg = platform_get_device_id(pdev)->driver_data; wleds->state = DA9052_WLEDS_OFF; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = DA9052_MAX_BRIGHTNESS; -- GitLab From 693c1edfcf218814c6880e95b37496832446beb7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:25 +0000 Subject: [PATCH 1009/1418] backlight: lm3639: Fully initialize backlight_properties during probe [ Upstream commit abb5a5d951fbea3feb5c4ba179b89bb96a1d3462 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: 0f59858d5119 ("backlight: add new lm3639 backlight driver") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-3-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3639_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c index 6580911671a3e..4c9726a7fa720 100644 --- a/drivers/video/backlight/lm3639_bl.c +++ b/drivers/video/backlight/lm3639_bl.c @@ -339,6 +339,7 @@ static int lm3639_probe(struct i2c_client *client, } /* backlight */ + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.brightness = pdata->init_brt_led; props.max_brightness = pdata->max_brt_led; -- GitLab From 765f673c0eb9115d08689c89a1cc1acf1c2992c7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:26 +0000 Subject: [PATCH 1010/1418] backlight: lp8788: Fully initialize backlight_properties during probe [ Upstream commit 392346827fbe8a7fd573dfb145170d7949f639a6 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: c5a51053cf3b ("backlight: add new lp8788 backlight driver") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-4-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lp8788_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c index ba42f3fe0c739..d9b95dbd40d30 100644 --- a/drivers/video/backlight/lp8788_bl.c +++ b/drivers/video/backlight/lp8788_bl.c @@ -191,6 +191,7 @@ static int lp8788_backlight_register(struct lp8788_bl *bl) int init_brt; char *name; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = MAX_BRIGHTNESS; -- GitLab From faa2ba4cebdfc58f583cad52348d0a491d6df6f8 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 24 Feb 2024 18:42:28 +0100 Subject: [PATCH 1011/1418] sparc32: Fix section mismatch in leon_pci_grpci [ Upstream commit 24338a6ae13cb743ced77da1b3a12c83f08a0c96 ] Passing a datastructre marked _initconst to platform_driver_register() is wrong. Drop the __initconst notation. This fixes the following warnings: WARNING: modpost: vmlinux: section mismatch in reference: grpci1_of_driver+0x30 (section: .data) -> grpci1_of_match (section: .init.rodata) WARNING: modpost: vmlinux: section mismatch in reference: grpci2_of_driver+0x30 (section: .data) -> grpci2_of_match (section: .init.rodata) Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Cc: Andreas Larsson Fixes: 4154bb821f0b ("sparc: leon: grpci1: constify of_device_id") Fixes: 03949b1cb9f1 ("sparc: leon: grpci2: constify of_device_id") Tested-by: Randy Dunlap # build-tested Reviewed-by: Andreas Larsson Tested-by: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240224-sam-fix-sparc32-all-builds-v2-7-1f186603c5c4@ravnborg.org Signed-off-by: Sasha Levin --- arch/sparc/kernel/leon_pci_grpci1.c | 2 +- arch/sparc/kernel/leon_pci_grpci2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index e6935d0ac1ec9..c32590bdd3120 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -696,7 +696,7 @@ err1: return err; } -static const struct of_device_id grpci1_of_match[] __initconst = { +static const struct of_device_id grpci1_of_match[] = { { .name = "GAISLER_PCIFBRG", }, diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index ca22f93d90454..dd06abc61657f 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -887,7 +887,7 @@ err1: return err; } -static const struct of_device_id grpci2_of_match[] __initconst = { +static const struct of_device_id grpci2_of_match[] = { { .name = "GAISLER_GRPCI2", }, -- GitLab From a8b2b26fdd011ebe36d68a9a321ca45801685959 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Sat, 2 Mar 2024 00:52:14 +0000 Subject: [PATCH 1012/1418] clk: Fix clk_core_get NULL dereference [ Upstream commit e97fe4901e0f59a0bfd524578fe3768f8ca42428 ] It is possible for clk_core_get to dereference a NULL in the following sequence: clk_core_get() of_clk_get_hw_from_clkspec() __of_clk_get_hw_from_provider() __clk_get_hw() __clk_get_hw() can return NULL which is dereferenced by clk_core_get() at hw->core. Prior to commit dde4eff47c82 ("clk: Look for parents with clkdev based clk_lookups") the check IS_ERR_OR_NULL() was performed which would have caught the NULL. Reading the description of this function it talks about returning NULL but that cannot be so at the moment. Update the function to check for hw before dereferencing it and return NULL if hw is NULL. Fixes: dde4eff47c82 ("clk: Look for parents with clkdev based clk_lookups") Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20240302-linux-next-24-03-01-simple-clock-fixes-v1-1-25f348a5982b@linaro.org Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 33fedbd096f33..9004e07182259 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -407,6 +407,9 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) if (IS_ERR(hw)) return ERR_CAST(hw); + if (!hw) + return NULL; + return hw->core; } -- GitLab From 8c4889a9ea861d7be37463c10846eb75e1b49c9d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 1 Mar 2024 16:44:37 +0800 Subject: [PATCH 1013/1418] clk: zynq: Prevent null pointer dereference caused by kmalloc failure [ Upstream commit 7938e9ce39d6779d2f85d822cc930f73420e54a6 ] The kmalloc() in zynq_clk_setup() will return null if the physical memory has run out. As a result, if we use snprintf() to write data to the null address, the null pointer dereference bug will happen. This patch uses a stack variable to replace the kmalloc(). Fixes: 0ee52b157b8e ("clk: zynq: Add clock controller driver") Suggested-by: Michal Simek Suggested-by: Stephen Boyd Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240301084437.16084-1-duoming@zju.edu.cn Acked-by: Michal Simek Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/zynq/clkc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c index 7bdeaff2bfd68..c28d3dacf0fb2 100644 --- a/drivers/clk/zynq/clkc.c +++ b/drivers/clk/zynq/clkc.c @@ -42,6 +42,7 @@ static void __iomem *zynq_clkc_base; #define SLCR_SWDT_CLK_SEL (zynq_clkc_base + 0x204) #define NUM_MIO_PINS 54 +#define CLK_NAME_LEN 16 #define DBG_CLK_CTRL_CLKACT_TRC BIT(0) #define DBG_CLK_CTRL_CPU_1XCLKACT BIT(1) @@ -215,7 +216,7 @@ static void __init zynq_clk_setup(struct device_node *np) int i; u32 tmp; int ret; - char *clk_name; + char clk_name[CLK_NAME_LEN]; unsigned int fclk_enable = 0; const char *clk_output_name[clk_max]; const char *cpu_parents[4]; @@ -426,12 +427,10 @@ static void __init zynq_clk_setup(struct device_node *np) "gem1_emio_mux", CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 0, 0, &gem1clk_lock); - tmp = strlen("mio_clk_00x"); - clk_name = kmalloc(tmp, GFP_KERNEL); for (i = 0; i < NUM_MIO_PINS; i++) { int idx; - snprintf(clk_name, tmp, "mio_clk_%2.2d", i); + snprintf(clk_name, CLK_NAME_LEN, "mio_clk_%2.2d", i); idx = of_property_match_string(np, "clock-names", clk_name); if (idx >= 0) can_mio_mux_parents[i] = of_clk_get_parent_name(np, @@ -439,7 +438,6 @@ static void __init zynq_clk_setup(struct device_node *np) else can_mio_mux_parents[i] = dummy_nm; } - kfree(clk_name); clk_register_mux(NULL, "can_mux", periph_parents, 4, CLK_SET_RATE_NO_REPARENT, SLCR_CAN_CLK_CTRL, 4, 2, 0, &canclk_lock); -- GitLab From fd58b4e3c0c68ce51bdcf85adbe0b21e768b7088 Mon Sep 17 00:00:00 2001 From: Athaariq Ardhiansyah Date: Sun, 10 Mar 2024 20:58:44 +0700 Subject: [PATCH 1014/1418] ALSA: hda/realtek: fix ALC285 issues on HP Envy x360 laptops [ Upstream commit c062166995c9e57d5cd508b332898f79da319802 ] Realtek codec on HP Envy laptop series are heavily modified by vendor. Therefore, need intervention to make it work properly. The patch fixes: - B&O soundbar speakers (between lid and keyboard) activation - Enable LED on mute button - Add missing process coefficient which affects the output amplifier - Volume control synchronization between B&O soundbar and side speakers - Unmute headset output on several HP Envy models - Auto-enable headset mic when plugged This patch was tested on HP Envy x360 13-AR0107AU with Realtek ALC285 The only unsolved problem is output amplifier of all built-in speakers is too weak, which causes volume of built-in speakers cannot be loud as vendor's proprietary driver due to missing _DSD parameter in the firmware. The solution is currently on research. Expected to has another patch in the future. Potential fix to related issues, need test before close those issues: - https://bugzilla.kernel.org/show_bug.cgi?id=189331 - https://bugzilla.kernel.org/show_bug.cgi?id=216632 - https://bugzilla.kernel.org/show_bug.cgi?id=216311 - https://bugzilla.kernel.org/show_bug.cgi?id=213507 Signed-off-by: Athaariq Ardhiansyah Message-ID: <20240310140249.3695-1-foss@athaariq.my.id> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ede3f8b273d79..6e759032eba2e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6693,6 +6693,60 @@ static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, } } +static void alc285_fixup_hp_envy_x360(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + static const struct coef_fw coefs[] = { + WRITE_COEF(0x08, 0x6a0c), WRITE_COEF(0x0d, 0xa023), + WRITE_COEF(0x10, 0x0320), WRITE_COEF(0x1a, 0x8c03), + WRITE_COEF(0x25, 0x1800), WRITE_COEF(0x26, 0x003a), + WRITE_COEF(0x28, 0x1dfe), WRITE_COEF(0x29, 0xb014), + WRITE_COEF(0x2b, 0x1dfe), WRITE_COEF(0x37, 0xfe15), + WRITE_COEF(0x38, 0x7909), WRITE_COEF(0x45, 0xd489), + WRITE_COEF(0x46, 0x00f4), WRITE_COEF(0x4a, 0x21e0), + WRITE_COEF(0x66, 0x03f0), WRITE_COEF(0x67, 0x1000), + WRITE_COEF(0x6e, 0x1005), { } + }; + + static const struct hda_pintbl pincfgs[] = { + { 0x12, 0xb7a60130 }, /* Internal microphone*/ + { 0x14, 0x90170150 }, /* B&O soundbar speakers */ + { 0x17, 0x90170153 }, /* Side speakers */ + { 0x19, 0x03a11040 }, /* Headset microphone */ + { } + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + + /* Fixes volume control problem for side speakers */ + alc295_fixup_disable_dac3(codec, fix, action); + + /* Fixes no sound from headset speaker */ + snd_hda_codec_amp_stereo(codec, 0x21, HDA_OUTPUT, 0, -1, 0); + + /* Auto-enable headset mic when plugged */ + snd_hda_jack_set_gating_jack(codec, 0x19, 0x21); + + /* Headset mic volume enhancement */ + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREF50); + break; + case HDA_FIXUP_ACT_INIT: + alc_process_coef_fw(codec, coefs); + break; + case HDA_FIXUP_ACT_BUILD: + rename_ctl(codec, "Bass Speaker Playback Volume", + "B&O-Tuned Playback Volume"); + rename_ctl(codec, "Front Playback Switch", + "B&O Soundbar Playback Switch"); + rename_ctl(codec, "Bass Speaker Playback Switch", + "Side Speaker Playback Switch"); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -7131,6 +7185,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -9054,6 +9109,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_ENVY_X360] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_envy_x360, + .chained = true, + .chain_id = ALC285_FIXUP_HP_GPIO_AMP_INIT, + }, [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_ideapad_s740_coef, @@ -9595,6 +9656,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), @@ -10249,6 +10311,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, -- GitLab From 629af0d5fe94a35f498ba2c3f19bd78bfa591be6 Mon Sep 17 00:00:00 2001 From: Johan Carlsson Date: Wed, 13 Mar 2024 09:15:09 +0100 Subject: [PATCH 1015/1418] ALSA: usb-audio: Stop parsing channels bits when all channels are found. [ Upstream commit a39d51ff1f52cd0b6fe7d379ac93bd8b4237d1b7 ] If a usb audio device sets more bits than the amount of channels it could write outside of the map array. Signed-off-by: Johan Carlsson Fixes: 04324ccc75f9 ("ALSA: usb-audio: add channel map support") Message-ID: <20240313081509.9801-1-johan.carlsson@teenage.engineering> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/stream.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 3d4add94e367d..d5409f3879455 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -300,9 +300,12 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, c = 0; if (bits) { - for (; bits && *maps; maps++, bits >>= 1) + for (; bits && *maps; maps++, bits >>= 1) { if (bits & 1) chmap->map[c++] = *maps; + if (c == chmap->channels) + break; + } } else { /* If we're missing wChannelConfig, then guess something to make sure the channel map is not skipped entirely */ -- GitLab From 0606bedcc373696c4c7d637dc30ed50725d184f5 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:24 -0500 Subject: [PATCH 1016/1418] RDMA/irdma: Allow accurate reporting on QP max send/recv WR [ Upstream commit 3a8498720450174b8db450d3375a04dca81b3534 ] Currently the attribute cap.max_send_wr and cap.max_recv_wr sent from user-space during create QP are the provider computed SQ/RQ depth as opposed to raw values passed from application. This inhibits computation of an accurate value for max_send_wr and max_recv_wr for this QP in the kernel which matches the value returned in user create QP. Also these capabilities needs to be reported from the driver in query QP. Add support by extending the ABI to allow the raw cap.max_send_wr and cap.max_recv_wr to be passed from user-space, while keeping compatibility for the older scheme. The internal HW depth and shift needed for the WQs needs to be computed now for both kernel and user-mode QPs. Add new helpers to assist with this: irdma_uk_calc_depth_shift_sq, irdma_uk_calc_depth_shift_rq and irdma_uk_calc_depth_shift_wq. Consolidate all the user mode QP setup into a new function irdma_setup_umode_qp which keeps it with its counterpart irdma_setup_kmode_qp. Signed-off-by: Youvaraj Sagar Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155525.1081-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Stable-dep-of: 926e8ea4b8da ("RDMA/irdma: Remove duplicate assignment") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/uk.c | 89 +++++++++++--- drivers/infiniband/hw/irdma/user.h | 10 ++ drivers/infiniband/hw/irdma/verbs.c | 182 +++++++++++++++++----------- drivers/infiniband/hw/irdma/verbs.h | 3 +- include/uapi/rdma/irdma-abi.h | 6 + 5 files changed, 203 insertions(+), 87 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 280d633d4ec4f..d691cdef5e9a3 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1414,6 +1414,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ + * @ukinfo: qp initialization info + * @sq_shift: Returns shift of SQ + * @rq_shift: Returns shift of RQ + */ +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) @@ -1428,23 +1500,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; - u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; - irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt, - info->max_inline_data, &sqshift); - if (info->abi_ver > 4) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - } else { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1, - info->max_inline_data, &sqshift); - } qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1458,7 +1519,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; - sq_ring_size = qp->sq_size << sqshift; + sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { @@ -1473,9 +1534,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; - qp->rq_wqe_size = rqshift; + qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); - qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index d0cdf609f5e06..1e0e1a71dbada 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -295,6 +295,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -374,8 +380,12 @@ struct irdma_qp_uk_init_info { u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; u8 first_sq_wq; u8 type; + u8 sq_shift; + u8 rq_shift; int abi_ver; bool legacy_mode; }; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 42c671f209233..bb423849968d9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); - struct irdma_uk_attrs *uk_attrs; + struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) @@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) + ucontext->use_raw_attrs = true; + /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) @@ -327,6 +329,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -566,6 +569,86 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, } } +/** + * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @iwdev: iwarp device + * @iwqp: qp ptr (user or kernel) + * @info: initialize info to return + * @init_attr: Initial QP create attributes + */ +static int irdma_setup_umode_qp(struct ib_udata *udata, + struct irdma_device *iwdev, + struct irdma_qp *iwqp, + struct irdma_qp_init_info *info, + struct ib_qp_init_attr *init_attr) +{ + struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct irdma_ucontext, ibucontext); + struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; + struct irdma_create_qp_req req; + unsigned long flags; + int ret; + + ret = ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen)); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n"); + return ret; + } + + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; + iwqp->user_mode = 1; + if (req.user_wqe_bufs) { + info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + if (!iwqp->iwpbl) { + ret = -ENODATA; + ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); + return ret; + } + } + + if (!ucontext->use_raw_attrs) { + /** + * Maintain backward compat with older ABI which passes sq and + * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. + * There is no way to compute the correct value of + * iwqp->max_send_wr/max_recv_wr in the kernel. + */ + iwqp->max_send_wr = init_attr->cap.max_send_wr; + iwqp->max_recv_wr = init_attr->cap.max_recv_wr; + ukinfo->sq_size = init_attr->cap.max_send_wr; + ukinfo->rq_size = init_attr->cap.max_recv_wr; + irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, + &ukinfo->rq_shift); + } else { + ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); + if (ret) + return ret; + + ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); + if (ret) + return ret; + + iwqp->max_send_wr = + (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = + (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + } + + irdma_setup_virt_qp(iwdev, iwqp, info); + + return 0; +} + /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device @@ -579,40 +662,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; - u32 sqdepth, rqdepth; - u8 sqshift, rqshift; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; - irdma_get_wqe_shift(uk_attrs, - uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 : - ukinfo->max_sq_frag_cnt, - ukinfo->max_inline_data, &sqshift); - status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, - &sqdepth); + status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); if (status) return status; - if (uk_attrs->hw_rev == IRDMA_GEN_1) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - else - irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0, - &rqshift); - - status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, - &rqdepth); + status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = - kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = - kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; @@ -622,7 +693,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; - size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE; + size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = ALIGN(size, 256); @@ -638,16 +709,19 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq = mem->va; info->sq_pa = mem->pa; - ukinfo->rq = &ukinfo->sq[sqdepth]; - info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; - info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sqdepth >> sqshift; - ukinfo->rq_size = rqdepth >> rqshift; + ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; + info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; + info->shadow_area_pa = + info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; - init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; - init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; + iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + init_attr->cap.max_send_wr = iwqp->max_send_wr; + init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } @@ -805,18 +879,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; - int sq_size; - int rq_size; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; - unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -826,13 +896,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return -EINVAL; - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; - init_info.qp_uk_init_info.sq_size = sq_size; - init_info.qp_uk_init_info.rq_size = rq_size; + init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; + init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; @@ -882,36 +949,9 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { - err_code = ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen)); - if (err_code) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: ib_copy_from_data fail\n"); - goto error; - } - - iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - iwqp->user_mode = 1; - if (req.user_wqe_bufs) { - struct irdma_ucontext *ucontext = - rdma_udata_to_drv_context(udata, - struct irdma_ucontext, - ibucontext); - - init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode; - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - if (!iwqp->iwpbl) { - err_code = -ENODATA; - ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); - goto error; - } - } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - irdma_setup_virt_qp(iwdev, iwqp, &init_info); + err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, + init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; @@ -966,8 +1006,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; - iwqp->max_send_wr = sq_size; - iwqp->max_recv_wr = rq_size; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { @@ -988,8 +1026,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; + uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; + uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 9f9e273bbff3e..0bc0d0faa0868 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -18,7 +18,8 @@ struct irdma_ucontext { struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ int abi_ver; - bool legacy_mode; + u8 legacy_mode : 1; + u8 use_raw_attrs : 1; }; struct irdma_pd { diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index a7085e092d348..3a0cde4dcf331 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -22,10 +22,15 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_CQ = 2, }; +enum { + IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, +}; + struct irdma_alloc_ucontext_req { __u32 rsvd32; __u8 userspace_ver; __u8 rsvd8[3]; + __aligned_u64 comp_mask; }; struct irdma_alloc_ucontext_resp { @@ -46,6 +51,7 @@ struct irdma_alloc_ucontext_resp { __u16 max_hw_sq_chunk; __u8 hw_rev; __u8 rsvd2; + __aligned_u64 comp_mask; }; struct irdma_alloc_pd_resp { -- GitLab From ae771b805dfd6f09c0c43a5e85056ce8cfc77d42 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:39:53 -0600 Subject: [PATCH 1017/1418] RDMA/irdma: Remove duplicate assignment [ Upstream commit 926e8ea4b8dac84f6d14a4b60d0653f1f2ba9431 ] Remove the unneeded assignment of the qp_num which is already set in irdma_create_qp(). Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233953.400483-1-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index bb423849968d9..76c5f461faca0 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -716,7 +716,6 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; - ukinfo->qp_id = iwqp->ibqp.qp_num; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; @@ -941,7 +940,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; - init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; + init_info.qp_uk_init_info.qp_id = qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; -- GitLab From e362d007294955a4fb929e1c8978154a64efdcb6 Mon Sep 17 00:00:00 2001 From: William Kucharski Date: Fri, 2 Feb 2024 02:15:49 -0700 Subject: [PATCH 1018/1418] RDMA/srpt: Do not register event handler until srpt device is fully setup [ Upstream commit c21a8870c98611e8f892511825c9607f1e2cd456 ] Upon rare occasions, KASAN reports a use-after-free Write in srpt_refresh_port(). This seems to be because an event handler is registered before the srpt device is fully setup and a race condition upon error may leave a partially setup event handler in place. Instead, only register the event handler after srpt device initialization is complete. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: William Kucharski Link: https://lore.kernel.org/r/20240202091549.991784-2-william.kucharski@oracle.com Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index cffa93f114a73..fd6c260d5857d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3209,7 +3209,6 @@ static int srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - ib_register_event_handler(&sdev->event_handler); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; @@ -3232,6 +3231,7 @@ static int srpt_add_one(struct ib_device *device) } } + ib_register_event_handler(&sdev->event_handler); spin_lock(&srpt_dev_lock); list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); @@ -3242,7 +3242,6 @@ static int srpt_add_one(struct ib_device *device) err_port: srpt_unregister_mad_agent(sdev, i); - ib_unregister_event_handler(&sdev->event_handler); err_cm: if (sdev->cm_id) ib_destroy_cm_id(sdev->cm_id); -- GitLab From 492acea36b29499ae0348e033073ec21d6cb2746 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Feb 2023 15:04:56 +0800 Subject: [PATCH 1019/1418] f2fs: reduce stack memory cost by using bitfield in struct f2fs_io_info [ Upstream commit 2eae077e6e46f9046d383631145750e043820dce ] This patch tries to use bitfield in struct f2fs_io_info to improve memory usage. struct f2fs_io_info { ... unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ unsigned int version:8; /* version of the node */ unsigned int submitted:1; /* indicate IO submission */ unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ unsigned int retry:1; /* need to reallocate block address */ unsigned int encrypted:1; /* indicate file is encrypted */ unsigned int post_read:1; /* require post read */ ... }; After this patch, size of struct f2fs_io_info reduces from 136 to 120. [Nathan: fix a compile warning (single-bit-bitfield-constant-conversion)] Signed-off-by: Nathan Chancellor Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 8a430dd49e9c ("f2fs: compress: fix to guarantee persisting compressed blocks by CP") Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/compress.c | 5 +++-- fs/f2fs/data.c | 10 +++++----- fs/f2fs/f2fs.h | 18 +++++++++--------- fs/f2fs/gc.c | 8 ++++---- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 6 +++--- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eb4d69f53337f..3ec203bbd5593 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -70,7 +70,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_por = !is_meta, + .is_por = !is_meta ? 1 : 0, }; int err; @@ -234,8 +234,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, - .in_list = false, - .is_por = (type == META_POR), + .in_list = 0, + .is_por = (type == META_POR) ? 1 : 0, }; struct blk_plug plug; int err; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 967262c37da52..7d85db7208e14 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1249,10 +1249,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, - .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode), + .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? + 1 : 0, }; struct dnode_of_data dn; struct node_info ni; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8b561af379743..7b3eb192f9c03 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -982,7 +982,7 @@ next: bio_page = fio->page; /* set submitted = true as a return value */ - fio->submitted = true; + fio->submitted = 1; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -998,7 +998,7 @@ alloc_new: (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = true; + fio->retry = 1; goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_VECS); @@ -2776,10 +2776,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, - .post_read = f2fs_post_read_required(inode), + .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, @@ -2902,7 +2902,7 @@ out: } if (submitted) - *submitted = fio.submitted ? 1 : 0; + *submitted = fio.submitted; return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e5a9498b89c06..1d78bca5037f4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1183,19 +1183,19 @@ struct f2fs_io_info { struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ - bool submitted; /* indicate IO submission */ - int need_lock; /* indicate we need to lock cp_rwsem */ - bool in_list; /* indicate fio is in io_list */ - bool is_por; /* indicate IO is from recovery or not */ - bool retry; /* need to reallocate block address */ - int compr_blocks; /* # of compressed block addresses */ - bool encrypted; /* indicate file is encrypted */ - bool post_read; /* require post read */ + unsigned int compr_blocks; /* # of compressed block addresses */ + unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ + unsigned int version:8; /* version of the node */ + unsigned int submitted:1; /* indicate IO submission */ + unsigned int in_list:1; /* indicate fio is in io_list */ + unsigned int is_por:1; /* indicate IO is from recovery or not */ + unsigned int retry:1; /* need to reallocate block address */ + unsigned int encrypted:1; /* indicate file is encrypted */ + unsigned int post_read:1; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ sector_t *last_block; /* last block number in bio */ - unsigned char version; /* version of the node */ }; struct bio_entry { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ec7212f7a9b73..8161355658562 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1187,8 +1187,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; int err; @@ -1276,8 +1276,8 @@ static int move_data_block(struct inode *inode, block_t bidx, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c6d0e07096326..5db6740d31364 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1587,7 +1587,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 16bf9d5c8d4f9..a3dabec1f216a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3312,10 +3312,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_bio_info *io; if (F2FS_IO_ALIGNED(sbi)) - fio->retry = false; + fio->retry = 0; INIT_LIST_HEAD(&fio->list); - fio->in_list = true; + fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -3396,7 +3396,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, - .in_list = false, + .in_list = 0, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) -- GitLab From e54cce8137258a550b49cae45d09e024821fb28d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:27 +0800 Subject: [PATCH 1020/1418] f2fs: compress: fix to guarantee persisting compressed blocks by CP [ Upstream commit 8a430dd49e9cb021372b0ad91e60aeef9c6ced00 ] If data block in compressed cluster is not persisted with metadata during checkpoint, after SPOR, the data may be corrupted, let's guarantee to write compressed page by checkpoint. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 4 +++- fs/f2fs/data.c | 17 +++++++++-------- fs/f2fs/f2fs.h | 4 +++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7d85db7208e14..459bf10f297a2 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1437,6 +1437,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = (struct compress_io_ctx *)page_private(page); + enum count_type type = WB_DATA_TYPE(page, + f2fs_is_compressed_page(page)); int i; if (unlikely(bio->bi_status)) @@ -1444,7 +1446,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7b3eb192f9c03..6f649a60859fc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -50,7 +50,7 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; @@ -67,8 +67,6 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; @@ -327,7 +325,7 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); + enum count_type type = WB_DATA_TYPE(page, false); if (page_private_dummy(page)) { clear_page_private_dummy(page); @@ -733,7 +731,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -941,7 +939,7 @@ alloc_new: if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -955,6 +953,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -984,7 +983,8 @@ next: /* set submitted = true as a return value */ fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_page, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, @@ -997,7 +997,8 @@ alloc_new: if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + dec_page_count(sbi, WB_DATA_TYPE(bio_page, + fio->compressed_page)); fio->retry = 1; goto skip; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1d78bca5037f4..16dacf811481c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1067,7 +1067,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(p, f) \ + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -3761,6 +3762,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, -- GitLab From 542c8b3c774a480bfd0804291a12f6f2391b0cd1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:28 +0800 Subject: [PATCH 1021/1418] f2fs: compress: fix to cover normal cluster write with cp_rwsem [ Upstream commit fd244524c2cf07b5f4c3fe8abd6a99225c76544b ] When we overwrite compressed cluster w/ normal cluster, we should not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data will be corrupted if partial blocks were persisted before CP & SPOR, due to cluster metadata wasn't updated atomically. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 27 ++++++++++++++++++--------- fs/f2fs/data.c | 3 ++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 459bf10f297a2..553950962842a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1462,12 +1462,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1482,6 +1484,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; @@ -1506,7 +1512,7 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { @@ -1514,26 +1520,29 @@ continue_unlock: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; + goto out; f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6f649a60859fc..3f0ba71451c27 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2779,7 +2779,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, @@ -2859,6 +2859,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } -- GitLab From b5ecf59fbab99edb06a7387faa7949be631046e6 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 13 Jan 2024 03:41:29 +0800 Subject: [PATCH 1022/1418] f2fs: compress: fix to check unreleased compressed cluster [ Upstream commit eb8fbaa53374e0a2d4381190abfe708481517bbb ] Compressed cluster may not be released due to we can fail in release_compress_blocks(), fix to handle reserved compressed cluster correctly in reserve_compress_blocks(). Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Sheng Yong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 46e4960a9dcf7..6c9f03e3be8e9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3609,7 +3609,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) goto next; } - if (__is_valid_data_blkaddr(blkaddr)) { + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR || + __is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } @@ -3619,6 +3625,11 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) } reserved = cluster_size - compr_blocks; + + /* for the case all blocks in cluster were reserved */ + if (reserved == 1) + goto next; + ret = inc_valid_block_count(sbi, dn->inode, &reserved); if (ret) return ret; -- GitLab From 621da84f10154d47ab4d4d46ec2c7d4a0999b7c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2022 10:15:18 +0100 Subject: [PATCH 1023/1418] f2fs: simplify __allocate_data_block [ Upstream commit 3cf684f2f8e0229714fb6d051508b42d3320e78f ] Just use a simple if block for the conditional call to inc_valid_block_count. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3f0ba71451c27..77b825fdeec85 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1420,13 +1420,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; dn->data_blkaddr = f2fs_data_blkaddr(dn); - if (dn->data_blkaddr != NULL_ADDR) - goto alloc; - - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) - return err; + if (dn->data_blkaddr == NULL_ADDR) { + err = inc_valid_block_count(sbi, dn->inode, &count); + if (unlikely(err)) + return err; + } -alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, -- GitLab From 17bfaa58db0c52f7e14210e76b3862162a66ae96 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:35 +0800 Subject: [PATCH 1024/1418] f2fs: delete obsolete FI_FIRST_BLOCK_WRITTEN [ Upstream commit a53936361330e4c55c0654605178281387d9c761 ] Commit 3c6c2bebef79 ("f2fs: avoid punch_hole overhead when releasing volatile data") introduced FI_FIRST_BLOCK_WRITTEN as below reason: This patch is to avoid some punch_hole overhead when releasing volatile data. If volatile data was not written yet, we just can make the first page as zero. After commit 7bc155fec5b3 ("f2fs: kill volatile write support"), we won't support volatile write, but it missed to remove obsolete FI_FIRST_BLOCK_WRITTEN, delete it in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 2 -- fs/f2fs/data.c | 2 -- fs/f2fs/f2fs.h | 6 ------ fs/f2fs/file.c | 3 --- fs/f2fs/gc.c | 2 -- fs/f2fs/inode.c | 25 ------------------------- 6 files changed, 40 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 553950962842a..5973fda2349c7 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1388,8 +1388,6 @@ unlock_continue: add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); - if (cc->cluster_idx == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); if (quota_inode) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 77b825fdeec85..95e737fc75ef0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2739,8 +2739,6 @@ got_it: f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 16dacf811481c..2364b6f7500b2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,7 +764,6 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ @@ -3248,11 +3247,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_first_block_written(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); -} - static inline bool f2fs_is_drop_cache(struct inode *inode) { return is_inode_flag_set(inode, FI_DROP_CACHE); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6c9f03e3be8e9..4d634b5b6011f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -602,9 +602,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) valid_blocks++; } - if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); - f2fs_invalidate_blocks(sbi, blkaddr); if (!released || blkaddr != COMPRESS_ADDR) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8161355658562..d4662ccb94c8f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1410,8 +1410,6 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0010579f17368..f0f2584fed66e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -74,20 +74,6 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static int __written_first_block(struct f2fs_sb_info *sbi, - struct f2fs_inode *ri) -{ - block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - - if (!__is_valid_data_blkaddr(addr)) - return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - return -EFSCORRUPTED; - } - return 0; -} - static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { int extra_size = get_extra_isize(inode); @@ -336,7 +322,6 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; - int err; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) @@ -417,16 +402,6 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (S_ISREG(inode->i_mode)) { - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; - } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); - } - if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; -- GitLab From 3c2e2c8059da54cab89bc6f7973cba821406a83e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:36 +0800 Subject: [PATCH 1025/1418] f2fs: delete obsolete FI_DROP_CACHE [ Upstream commit bb6e1c8fa5b9b95bbb8e39b6105f8f6550e070fc ] FI_DROP_CACHE was introduced in commit 1e84371ffeef ("f2fs: change atomic and volatile write policies") for volatile write feature, after commit 7bc155fec5b3 ("f2fs: kill volatile write support"), we won't support volatile write, let's delete related codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 6 ------ 2 files changed, 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 95e737fc75ef0..a5d0d0b892875 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2817,9 +2817,6 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, zero_user_segment(page, offset, PAGE_SIZE); write: - if (f2fs_is_drop_cache(inode)) - goto out; - /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2364b6f7500b2..0f7193a0ab422 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,7 +764,6 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ @@ -3247,11 +3246,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_drop_cache(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_DROP_CACHE); -} - static inline void *inline_data_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); -- GitLab From ebe3a9f3c286ad376ed433b163c57462f939f18c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:37 +0800 Subject: [PATCH 1026/1418] f2fs: introduce get_dnode_addr() to clean up codes [ Upstream commit 2020cd48e41cb8470bb1ca0835033d13d3178425 ] Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 11 ++--------- fs/f2fs/f2fs.h | 18 +++++++++++++++--- fs/f2fs/file.c | 8 +------- fs/f2fs/inode.c | 32 ++++++++++++++------------------ 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a5d0d0b892875..65b69d38f332c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1105,16 +1105,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, static void __set_data_blkaddr(struct dnode_of_data *dn) { - struct f2fs_node *rn = F2FS_NODE(dn->node_page); - __le32 *addr_array; - int base = 0; + __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0f7193a0ab422..108ba10679da2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3246,12 +3246,13 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { - struct f2fs_inode *ri = F2FS_INODE(page); - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, page); - return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); + return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -3386,6 +3387,17 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + int base = 0; + + if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) + base = get_extra_isize(inode); + + return blkaddr_in_node(F2FS_NODE(node_page)) + base; +} + #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4d634b5b6011f..9a81d51ddf9e2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -560,20 +560,14 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; - int base = 0; bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + base + ofs; + addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; /* Assumption: truncateion starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f0f2584fed66e..869bb6ec107cc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -59,35 +59,31 @@ void f2fs_set_inode_flags(struct inode *inode) S_ENCRYPTED|S_VERITY|S_CASEFOLD); } -static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __get_inode_rdev(struct inode *inode, struct page *node_page) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[extra_size]) - inode->i_rdev = old_decode_dev( - le32_to_cpu(ri->i_addr[extra_size])); + if (addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(addr[0])); else - inode->i_rdev = new_decode_dev( - le32_to_cpu(ri->i_addr[extra_size + 1])); + inode->i_rdev = new_decode_dev(le32_to_cpu(addr[1])); } } -static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __set_inode_rdev(struct inode *inode, struct page *node_page) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[extra_size] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 1] = 0; + addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + addr[1] = 0; } else { - ri->i_addr[extra_size] = 0; - ri->i_addr[extra_size + 1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 2] = 0; + addr[0] = 0; + addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + addr[2] = 0; } } } @@ -400,7 +396,7 @@ static int do_read_inode(struct inode *inode) } /* get rdev by using inline_info */ - __get_inode_rdev(inode, ri); + __get_inode_rdev(inode, node_page); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; @@ -672,7 +668,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) } } - __set_inode_rdev(inode, ri); + __set_inode_rdev(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) -- GitLab From bba2a0ba00eb7244ca7a574bc13ff71321e9360d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:38 +0800 Subject: [PATCH 1027/1418] f2fs: update blkaddr in __set_data_blkaddr() for cleanup [ Upstream commit 59d0d4c3eae0f3dd8886ed59f89f21fa09e324f5 ] This patch allows caller to pass blkaddr to f2fs_set_data_blkaddr() and let __set_data_blkaddr() inside f2fs_set_data_blkaddr() to update dn->data_blkaddr w/ last value of blkaddr. Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 13 ++++++------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 12 ++++-------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 65b69d38f332c..1aa7d443cf364 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1103,10 +1103,11 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return 0; } -static void __set_data_blkaddr(struct dnode_of_data *dn) +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); + dn->data_blkaddr = blkaddr; addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } @@ -1116,18 +1117,17 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) * ->node_page * update block addresses in the node page */ -void f2fs_set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, blkaddr); if (set_page_dirty(dn->node_page)) dn->node_changed = true; } void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - dn->data_blkaddr = blkaddr; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, blkaddr); f2fs_update_read_extent_cache(dn); } @@ -1154,8 +1154,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, NEW_ADDR); count--; } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 108ba10679da2..7f34c7d0d156e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3781,7 +3781,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void f2fs_set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9a81d51ddf9e2..aa3ded9825f0c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -585,8 +585,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, @@ -1488,8 +1487,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } f2fs_update_read_extent_cache_range(dn, start, 0, index - start); @@ -3440,8 +3438,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (blkaddr != NEW_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); @@ -3611,8 +3608,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) continue; } - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } reserved = cluster_size - compr_blocks; -- GitLab From 67eba3e674a19e242b38cac032789cb509662884 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:30 +0800 Subject: [PATCH 1028/1418] f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode [ Upstream commit 54607494875edd636aff3c21ace3ad9a7da758a9 ] In reserve_compress_blocks(), we update blkaddrs of dnode in prior to inc_valid_block_count(), it may cause inconsistent status bewteen i_blocks and blkaddrs once inc_valid_block_count() fails. To fix this issue, it needs to reverse their invoking order. Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 26 ++++++++++++++------------ fs/f2fs/segment.c | 2 +- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1aa7d443cf364..b83b8ac29f430 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1142,7 +1142,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, @@ -1413,7 +1414,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { - err = inc_valid_block_count(sbi, dn->inode, &count); + err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7f34c7d0d156e..2afb91471b535 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2286,7 +2286,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { blkcnt_t diff = 0, release = 0; block_t avail_user_block_count; @@ -2327,6 +2327,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = 0; } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + goto enospc; + } + diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index aa3ded9825f0c..96b59c87f30c7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3587,14 +3587,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) blkcnt_t reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } + continue; } /* @@ -3607,8 +3609,6 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) compr_blocks++; continue; } - - f2fs_set_data_blkaddr(dn, NEW_ADDR); } reserved = cluster_size - compr_blocks; @@ -3617,12 +3617,14 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (reserved == 1) goto next; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a3dabec1f216a..aa1ba2fdfe00d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -247,7 +247,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; -- GitLab From 98df108a9ab068b7b6fb8085802eb596cf98b482 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Jan 2024 10:23:13 +0800 Subject: [PATCH 1029/1418] f2fs: compress: fix to cover f2fs_disable_compressed_file() w/ i_sem [ Upstream commit 2f9420d3a94aeebd92db88f00f4f2f1a3bd3f6cf ] - f2fs_disable_compressed_file - check inode_has_data - f2fs_file_mmap - mkwrite - f2fs_get_block_locked : update metadata in compressed inode's disk layout - fi->i_flags &= ~F2FS_COMPR_FL - clear_inode_flag(inode, FI_COMPRESSED_FILE); we should use i_sem lock to prevent above race case. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2afb91471b535..a81091a5e282d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4367,15 +4367,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&F2FS_I(inode)->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; } -- GitLab From fe4de493572a4263554903bf9c3afc5c196e15f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Jan 2024 22:49:15 +0800 Subject: [PATCH 1030/1418] f2fs: fix to avoid potential panic during recovery [ Upstream commit 21ec68234826b1b54ab980a8df6e33c74cfbee58 ] During recovery, if FAULT_BLOCK is on, it is possible that f2fs_reserve_new_block() will return -ENOSPC during recovery, then it may trigger panic. Also, if fault injection rate is 1 and only FAULT_BLOCK fault type is on, it may encounter deadloop in loop of block reservation. Let's change as below to fix these issues: - remove bug_on() to avoid panic. - limit the loop count of block reservation to avoid potential deadloop. Fixes: 956fa1ddc132 ("f2fs: fix to check return value of f2fs_reserve_new_block()") Reported-by: Zhiguo Niu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/recovery.c | 33 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a81091a5e282d..f5d69893d2d92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -74,6 +74,11 @@ struct f2fs_fault_info { extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 53a6487f91e44..f5efc37a2b513 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -582,6 +582,19 @@ truncate_out: return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page) { @@ -683,14 +696,8 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; continue; @@ -698,16 +705,8 @@ retry_dn: /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } -- GitLab From e85d53ebae882791b33ff0cd88e0905ef6f253eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:05:00 +0100 Subject: [PATCH 1031/1418] scsi: csiostor: Avoid function pointer casts [ Upstream commit 9f3dbcb5632d6876226031d552ef6163bb3ad215 ] csiostor uses function pointer casts to keep the csio_ln_ev state machine hidden, but this causes warnings about control flow integrity (KCFI) violations in clang-16 and higher: drivers/scsi/csiostor/csio_lnode.c:1098:33: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1098 | return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1369:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1369 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1373:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1373 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1377:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1377 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Move the enum into a shared header so the correct types can be used without the need for casts. Fixes: a3667aaed569 ("[SCSI] csiostor: Chelsio FCoE offload driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100518.457623-1-arnd@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_defs.h | 18 ++++++++++++++++-- drivers/scsi/csiostor/csio_lnode.c | 8 ++++---- drivers/scsi/csiostor/csio_lnode.h | 13 ------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/csiostor/csio_defs.h b/drivers/scsi/csiostor/csio_defs.h index c38017b4af982..e50e93e7fe5a1 100644 --- a/drivers/scsi/csiostor/csio_defs.h +++ b/drivers/scsi/csiostor/csio_defs.h @@ -73,7 +73,21 @@ csio_list_deleted(struct list_head *list) #define csio_list_prev(elem) (((struct list_head *)(elem))->prev) /* State machine */ -typedef void (*csio_sm_state_t)(void *, uint32_t); +struct csio_lnode; + +/* State machine evets */ +enum csio_ln_ev { + CSIO_LNE_NONE = (uint32_t)0, + CSIO_LNE_LINKUP, + CSIO_LNE_FAB_INIT_DONE, + CSIO_LNE_LINK_DOWN, + CSIO_LNE_DOWN_LINK, + CSIO_LNE_LOGO, + CSIO_LNE_CLOSE, + CSIO_LNE_MAX_EVENT, +}; + +typedef void (*csio_sm_state_t)(struct csio_lnode *ln, enum csio_ln_ev evt); struct csio_sm { struct list_head sm_list; @@ -83,7 +97,7 @@ struct csio_sm { static inline void csio_set_state(void *smp, void *state) { - ((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state; + ((struct csio_sm *)smp)->sm_state = state; } static inline void diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index d5ac938970232..5b3ffefae476d 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -1095,7 +1095,7 @@ csio_handle_link_down(struct csio_hw *hw, uint8_t portid, uint32_t fcfi, int csio_is_lnode_ready(struct csio_lnode *ln) { - return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)); + return (csio_get_state(ln) == csio_lns_ready); } /*****************************************************************************/ @@ -1366,15 +1366,15 @@ csio_free_fcfinfo(struct kref *kref) void csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str) { - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) { + if (csio_get_state(ln) == csio_lns_uninit) { strcpy(str, "UNINIT"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) { + if (csio_get_state(ln) == csio_lns_ready) { strcpy(str, "READY"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) { + if (csio_get_state(ln) == csio_lns_offline) { strcpy(str, "OFFLINE"); return; } diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h index 372a67d122d38..607698a0f0631 100644 --- a/drivers/scsi/csiostor/csio_lnode.h +++ b/drivers/scsi/csiostor/csio_lnode.h @@ -53,19 +53,6 @@ extern int csio_fcoe_rnodes; extern int csio_fdmi_enable; -/* State machine evets */ -enum csio_ln_ev { - CSIO_LNE_NONE = (uint32_t)0, - CSIO_LNE_LINKUP, - CSIO_LNE_FAB_INIT_DONE, - CSIO_LNE_LINK_DOWN, - CSIO_LNE_DOWN_LINK, - CSIO_LNE_LOGO, - CSIO_LNE_CLOSE, - CSIO_LNE_MAX_EVENT, -}; - - struct csio_fcf_info { struct list_head list; uint8_t priority; -- GitLab From 987bc93ecdaba7710cfd44959dc194453e2e59d0 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Mon, 19 Feb 2024 14:18:05 +0800 Subject: [PATCH 1032/1418] RDMA/hns: Fix mis-modifying default congestion control algorithm [ Upstream commit d20a7cf9f714f0763efb56f0f2eeca1cb91315ed ] Commit 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN") adds a check of congest control alorithm for UD. But that patch causes a problem: hr_dev->caps.congest_type is global, used by all QPs, so modifying this field to DCQCN for UD QPs causes other QPs unable to use any other algorithm except DCQCN. Revert the modification in commit 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN"). Add a new field cong_type to struct hns_roce_qp and configure DCQCN for UD QPs. Fixes: 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN") Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Luoyouming Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20240219061805.668170-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_device.h | 17 +++++++++-------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 ++++++++++------ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1112afa0af552..8748b65c87ea7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -595,6 +595,13 @@ struct hns_roce_work { u32 queue_num; }; +enum hns_roce_cong_type { + CONG_TYPE_DCQCN, + CONG_TYPE_LDCP, + CONG_TYPE_HC3, + CONG_TYPE_DIP, +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -639,6 +646,7 @@ struct hns_roce_qp { struct list_head sq_node; /* all send qps are on a list */ struct hns_user_mmap_entry *dwqe_mmap_entry; u32 config; + enum hns_roce_cong_type cong_type; }; struct hns_roce_ib_iboe { @@ -710,13 +718,6 @@ struct hns_roce_eq_table { struct hns_roce_eq *eq; }; -enum cong_type { - CONG_TYPE_DCQCN, - CONG_TYPE_LDCP, - CONG_TYPE_HC3, - CONG_TYPE_DIP, -}; - struct hns_roce_caps { u64 fw_ver; u8 num_ports; @@ -847,7 +848,7 @@ struct hns_roce_caps { u16 default_aeq_period; u16 default_aeq_arm_st; u16 default_ceq_arm_st; - enum cong_type cong_type; + enum hns_roce_cong_type cong_type; }; enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 58fbb1d3b7f41..d06b19e69a151 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4886,12 +4886,15 @@ static int check_cong_type(struct ib_qp *ibqp, struct hns_roce_congestion_algorithm *cong_alg) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - if (ibqp->qp_type == IB_QPT_UD) - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI) + hr_qp->cong_type = CONG_TYPE_DCQCN; + else + hr_qp->cong_type = hr_dev->caps.cong_type; /* different congestion types match different configurations */ - switch (hr_dev->caps.cong_type) { + switch (hr_qp->cong_type) { case CONG_TYPE_DCQCN: cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; @@ -4919,8 +4922,8 @@ static int check_cong_type(struct ib_qp *ibqp, default: ibdev_warn(&hr_dev->ib_dev, "invalid type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + hr_qp->cong_type); + hr_qp->cong_type = CONG_TYPE_DCQCN; cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; cong_alg->dip_vld = DIP_INVALID; @@ -4939,6 +4942,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_congestion_algorithm cong_field; struct ib_device *ibdev = ibqp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); u32 dip_idx = 0; int ret; @@ -4951,7 +4955,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return ret; hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id + - hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE); + hr_qp->cong_type * HNS_ROCE_CONG_SIZE); hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID); hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel); hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL); -- GitLab From aaa8e143bfe14c0c08512d3eb94ef38285251045 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Fri, 2 Feb 2024 19:53:13 -0800 Subject: [PATCH 1033/1418] RDMA/device: Fix a race between mad_client and cm_client init [ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ] The mad_client will be initialized in enable_device_and_get(), while the devices_rwsem will be downgraded to a read semaphore. There is a window that leads to the failed initialization for cm_client, since it can not get matched mad port from ib_mad_port_list, and the matched mad port will be added to the list after that. mad_client | cm_client ------------------|-------------------------------------------------------- ib_register_device| enable_device_and_get down_write(&devices_rwsem) xa_set_mark(&devices, DEVICE_REGISTERED) downgrade_write(&devices_rwsem) | |ib_cm_init |ib_register_client(&cm_client) |down_read(&devices_rwsem) |xa_for_each_marked (&devices, DEVICE_REGISTERED) |add_client_context |cm_add_one |ib_register_mad_agent |ib_get_mad_port |__ib_get_mad_port |list_for_each_entry(entry, &ib_mad_port_list, port_list) |return NULL |up_read(&devices_rwsem) | add_client_context| ib_mad_init_device| ib_mad_port_open | list_add_tail(&port_priv->port_list, &ib_mad_port_list) up_read(&devices_rwsem) | Fix it by using down_write(&devices_rwsem) in ib_register_client(). Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn Suggested-by: Jason Gunthorpe Signed-off-by: Shifeng Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 37 +++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3a9b9a28d858f..453188db39d83 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client) { int ret; - down_write(&clients_rwsem); + lockdep_assert_held(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client) client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) - goto out; + return ret; highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - -out: - up_write(&clients_rwsem); - return ret; + return 0; } static void remove_client_id(struct ib_client *client) @@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client) { struct ib_device *device; unsigned long index; + bool need_unreg = false; int ret; refcount_set(&client->uses, 1); init_completion(&client->uses_zero); + + /* + * The devices_rwsem is held in write mode to ensure that a racing + * ib_register_device() sees a consisent view of clients and devices. + */ + down_write(&devices_rwsem); + down_write(&clients_rwsem); ret = assign_client_id(client); if (ret) - return ret; + goto out; - down_read(&devices_rwsem); + need_unreg = true; xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&devices_rwsem); - ib_unregister_client(client); - return ret; - } + if (ret) + goto out; } - up_read(&devices_rwsem); - return 0; + ret = 0; +out: + up_write(&clients_rwsem); + up_write(&devices_rwsem); + if (need_unreg && ret) + ib_unregister_client(client); + return ret; } EXPORT_SYMBOL(ib_register_client); -- GitLab From b0455371cced8c1e6d26767cb0f013c1b219a076 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 21 Feb 2024 11:32:04 +0000 Subject: [PATCH 1034/1418] RDMA/rtrs-clt: Check strnlen return len in sysfs mpath_policy_store() [ Upstream commit 7a7b7f575a25aa68ee934ee8107294487efcb3fe ] strnlen() may return 0 (e.g. for "\0\n" string), it's better to check the result of strnlen() before using 'len - 1' expression for the 'buf' array index. Detected using the static analysis tool - Svace. Fixes: dc3b66a0ce70 ("RDMA/rtrs-clt: Add a minimum latency multipath policy") Signed-off-by: Alexey Kodanev Link: https://lore.kernel.org/r/20240221113204.147478-1-aleksei.kodanev@bell-sw.com Acked-by: Jack Wang Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index d3c436ead6946..4aa80c9388f05 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -133,7 +133,7 @@ static ssize_t mpath_policy_store(struct device *dev, /* distinguish "mi" and "min-latency" with length */ len = strnlen(buf, NAME_MAX); - if (buf[len - 1] == '\n') + if (len && buf[len - 1] == '\n') len--; if (!strncasecmp(buf, "round-robin", 11) || -- GitLab From 322eb43c0ea211713ecc40c12bddc0627d566ad7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2024 13:44:06 +0100 Subject: [PATCH 1035/1418] scsi: bfa: Fix function pointer type mismatch for hcb_qe->cbfn [ Upstream commit b69600231f751304db914c63b937f7098ed2895c ] Some callback functions used here take a boolean argument, others take a status argument. This breaks KCFI type checking, so clang now warns about the function pointer cast: drivers/scsi/bfa/bfad_bsg.c:2138:29: error: cast from 'void (*)(void *, enum bfa_status)' to 'bfa_cb_cbfn_t' (aka 'void (*)(void *, enum bfa_boolean)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Assuming the code is actually correct here and the callers always match the argument types of the callee, rework this to replace the explicit cast with a union of the two pointer types. This does not change the behavior of the code, so if something is actually broken here, a larger rework may be necessary. Fixes: 37ea0558b87a ("[SCSI] bfa: Added support to collect and reset fcport stats") Fixes: 3ec4f2c8bff2 ("[SCSI] bfa: Added support to configure QOS and collect stats.") Reviewed-by: Kees Cook Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240222124433.2046570-1-arnd@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bfa/bfa.h | 9 ++++++++- drivers/scsi/bfa/bfa_core.c | 4 +--- drivers/scsi/bfa/bfa_ioc.h | 8 ++++++-- drivers/scsi/bfa/bfad_bsg.c | 11 ++++------- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/bfa/bfa.h b/drivers/scsi/bfa/bfa.h index 7bd2ba1ad4d11..f30fe324e6ecc 100644 --- a/drivers/scsi/bfa/bfa.h +++ b/drivers/scsi/bfa/bfa.h @@ -20,7 +20,6 @@ struct bfa_s; typedef void (*bfa_isr_func_t) (struct bfa_s *bfa, struct bfi_msg_s *m); -typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Interrupt message handlers @@ -437,4 +436,12 @@ struct bfa_cb_pending_q_s { (__qe)->data = (__data); \ } while (0) +#define bfa_pending_q_init_status(__qe, __cbfn, __cbarg, __data) do { \ + bfa_q_qe_init(&((__qe)->hcb_qe.qe)); \ + (__qe)->hcb_qe.cbfn_status = (__cbfn); \ + (__qe)->hcb_qe.cbarg = (__cbarg); \ + (__qe)->hcb_qe.pre_rmv = BFA_TRUE; \ + (__qe)->data = (__data); \ +} while (0) + #endif /* __BFA_H__ */ diff --git a/drivers/scsi/bfa/bfa_core.c b/drivers/scsi/bfa/bfa_core.c index 6846ca8f7313c..3438d0b8ba062 100644 --- a/drivers/scsi/bfa/bfa_core.c +++ b/drivers/scsi/bfa/bfa_core.c @@ -1907,15 +1907,13 @@ bfa_comp_process(struct bfa_s *bfa, struct list_head *comp_q) struct list_head *qe; struct list_head *qen; struct bfa_cb_qe_s *hcb_qe; - bfa_cb_cbfn_status_t cbfn; list_for_each_safe(qe, qen, comp_q) { hcb_qe = (struct bfa_cb_qe_s *) qe; if (hcb_qe->pre_rmv) { /* qe is invalid after return, dequeue before cbfn() */ list_del(qe); - cbfn = (bfa_cb_cbfn_status_t)(hcb_qe->cbfn); - cbfn(hcb_qe->cbarg, hcb_qe->fw_status); + hcb_qe->cbfn_status(hcb_qe->cbarg, hcb_qe->fw_status); } else hcb_qe->cbfn(hcb_qe->cbarg, BFA_TRUE); } diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 933a1c3890ff5..5e568d6d7b261 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -361,14 +361,18 @@ struct bfa_reqq_wait_s { void *cbarg; }; -typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Generic BFA callback element. */ struct bfa_cb_qe_s { struct list_head qe; - bfa_cb_cbfn_t cbfn; + union { + bfa_cb_cbfn_status_t cbfn_status; + bfa_cb_cbfn_t cbfn; + }; bfa_boolean_t once; bfa_boolean_t pre_rmv; /* set for stack based qe(s) */ bfa_status_t fw_status; /* to access fw status in comp proc */ diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index be8dfbe13e904..524e4e6979c9f 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -2135,8 +2135,7 @@ bfad_iocmd_fcport_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_get_stats(&bfad->bfa, &cb_qe); spin_unlock_irqrestore(&bfad->bfad_lock, flags); @@ -2159,7 +2158,7 @@ bfad_iocmd_fcport_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_clear_stats(&bfad->bfa, &cb_qe); @@ -2443,8 +2442,7 @@ bfad_iocmd_qos_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); @@ -2474,8 +2472,7 @@ bfad_iocmd_qos_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); -- GitLab From 8ede73123cd594a3eb9595e325aa2b01d0fb06ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Feb 2024 00:08:18 +0800 Subject: [PATCH 1036/1418] f2fs: compress: fix to check zstd compress level correctly in mount option [ Upstream commit e39602da752cd1d0462e3fa04074146f6f2803f6 ] f2fs only support to config zstd compress level w/ a positive number due to layout design, but since commit e0c1b49f5b67 ("lib: zstd: Upgrade to latest upstream zstd version 1.4.10"), zstd supports negative compress level, so that zstd_min_clevel() may return a negative number, then w/ below mount option, .compress_level can be configed w/ a negative number, which is not allowed to f2fs, let's add check condition to avoid it. mount -o compress_algorithm=zstd:4294967295 /dev/sdx /mnt/f2fs Fixes: 00e120b5e4b5 ("f2fs: assign default compression level") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0c0d0671febea..c529ce5d986cc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -649,7 +649,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) #ifdef CONFIG_F2FS_FS_ZSTD static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { @@ -663,9 +663,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) f2fs_info(sbi, "wrong format, e.g. :"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(sbi, "do not support negative compress level: %d", level); + return -ERANGE; + } + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; -- GitLab From 8c0fe010cbe0ef6cb78c612cde9651dce3ad1b9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 24 Oct 2023 23:58:20 +0200 Subject: [PATCH 1037/1418] net: sunrpc: Fix an off by one in rpc_sockaddr2uaddr() [ Upstream commit d6f4de70f73a106986ee315d7d512539f2f3303a ] The intent is to check if the strings' are truncated or not. So, >= should be used instead of >, because strlcat() and snprintf() return the length of the output, excluding the trailing NULL. Fixes: a02d69261134 ("SUNRPC: Provide functions for managing universal addresses") Signed-off-by: Christophe JAILLET Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/addr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d435bffc61999..97ff11973c493 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) } if (snprintf(portbuf, sizeof(portbuf), - ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf)) return NULL; - if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf)) return NULL; return kstrdup(addrbuf, gfp_flags); -- GitLab From 06e828b3f1b206de08ef520fc46a40b22e1869cb Mon Sep 17 00:00:00 2001 From: Jorge Mora Date: Thu, 25 Jan 2024 07:56:12 -0700 Subject: [PATCH 1038/1418] NFSv4.2: fix nfs4_listxattr kernel BUG at mm/usercopy.c:102 [ Upstream commit 251a658bbfceafb4d58c76b77682c8bf7bcfad65 ] A call to listxattr() with a buffer size = 0 returns the actual size of the buffer needed for a subsequent call. When size > 0, nfs4_listxattr() does not return an error because either generic_listxattr() or nfs4_listxattr_nfs4_label() consumes exactly all the bytes then size is 0 when calling nfs4_listxattr_nfs4_user() which then triggers the following kernel BUG: [ 99.403778] kernel BUG at mm/usercopy.c:102! [ 99.404063] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [ 99.408463] CPU: 0 PID: 3310 Comm: python3 Not tainted 6.6.0-61.fc40.aarch64 #1 [ 99.415827] Call trace: [ 99.415985] usercopy_abort+0x70/0xa0 [ 99.416227] __check_heap_object+0x134/0x158 [ 99.416505] check_heap_object+0x150/0x188 [ 99.416696] __check_object_size.part.0+0x78/0x168 [ 99.416886] __check_object_size+0x28/0x40 [ 99.417078] listxattr+0x8c/0x120 [ 99.417252] path_listxattr+0x78/0xe0 [ 99.417476] __arm64_sys_listxattr+0x28/0x40 [ 99.417723] invoke_syscall+0x78/0x100 [ 99.417929] el0_svc_common.constprop.0+0x48/0xf0 [ 99.418186] do_el0_svc+0x24/0x38 [ 99.418376] el0_svc+0x3c/0x110 [ 99.418554] el0t_64_sync_handler+0x120/0x130 [ 99.418788] el0t_64_sync+0x194/0x198 [ 99.418994] Code: aa0003e3 d000a3e0 91310000 97f49bdb (d4210000) Issue is reproduced when generic_listxattr() returns 'system.nfs4_acl', thus calling lisxattr() with size = 16 will trigger the bug. Add check on nfs4_listxattr() to return ERANGE error when it is called with size > 0 and the return value is greater than size. Fixes: 012a211abd5d ("NFSv4.2: hook in the user extended attribute handlers") Signed-off-by: Jorge Mora Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ec3f0103e1a7f..7cc74f7451d67 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10592,29 +10592,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { ssize_t error, error2, error3; + size_t left = size; - error = generic_listxattr(dentry, list, size); + error = generic_listxattr(dentry, list, left); if (error < 0) return error; if (list) { list += error; - size -= error; + left -= error; } - error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); + error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left); if (error2 < 0) return error2; if (list) { list += error2; - size -= error2; + left -= error2; } - error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); if (error3 < 0) return error3; - return error + error2 + error3; + error += error2 + error3; + if (size && error > size) + return -ERANGE; + return error; } static void nfs4_enable_swap(struct inode *inode) -- GitLab From 18631d43d487233237dc41e765d067d2915fae8b Mon Sep 17 00:00:00 2001 From: Jorge Mora Date: Thu, 25 Jan 2024 07:51:28 -0700 Subject: [PATCH 1039/1418] NFSv4.2: fix listxattr maximum XDR buffer size [ Upstream commit bcac8bff90a6ee1629f90669cdb9d28fb86049b0 ] Switch order of operations to avoid creating a short XDR buffer: e.g., buflen = 12, old xdrlen = 12, new xdrlen = 20. Having a short XDR buffer leads to lxa_maxcount be a few bytes less than what is needed to retrieve the whole list when using a buflen as returned by a call with size = 0: buflen = listxattr(path, NULL, 0); buf = malloc(buflen); buflen = listxattr(path, buf, buflen); For a file with one attribute (name = '123456'), the first call with size = 0 will return buflen = 12 ('user.123456\x00'). The second call with size = 12, sends LISTXATTRS with lxa_maxcount = 12 + 8 (cookie) + 4 (array count) = 24. The XDR buffer needs 8 (cookie) + 4 (array count) + 4 (name count) + 6 (name len) + 2 (padding) + 4 (eof) = 28 which is 4 bytes shorter than the lxa_maxcount provided in the call. Fixes: 04a5da690e8f ("NFSv4.2: define limits and sizes for user xattr handling") Signed-off-by: Jorge Mora Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs42.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index b59876b01a1e3..0282d93c8bccb 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -55,11 +55,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name); * They would be 7 bytes long in the eventual buffer ("user.x\0"), and * 8 bytes long XDR-encoded. * - * Include the trailing eof word as well. + * Include the trailing eof word as well and make the result a multiple + * of 4 bytes. */ static inline u32 nfs42_listxattr_xdrsize(u32 buflen) { - return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; + u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4; + + return (size + 3) & ~3; } #endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ -- GitLab From 7486973accbc4a9046cdbcdf94028a9a7b67c890 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 19 Feb 2024 10:28:44 +0800 Subject: [PATCH 1040/1418] f2fs: compress: fix to check compress flag w/ .i_sem lock [ Upstream commit ea59b12ac69774c08aa95cd5b6100700ea0cce97 ] It needs to check compress flag w/ .i_sem lock, otherwise, compressed inode may be disabled after the check condition, it's not needed to set compress option on non-compress inode. Fixes: e1e8debec656 ("f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 96b59c87f30c7..be4cab941d299 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3967,16 +3967,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; file_start_write(filp); inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; -- GitLab From b91d54a2c4aa7a9028499d4bf869215e10097e19 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Feb 2024 12:32:05 -0800 Subject: [PATCH 1041/1418] f2fs: check number of blocks in a current section [ Upstream commit 7af2df0f67a1469762e59be3726a803882d83f6f ] In cfd66bb715fd ("f2fs: fix deadloop in foreground GC"), we needed to check the number of blocks in a section instead of the segment. Fixes: cfd66bb715fd ("f2fs: fix deadloop in foreground GC") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f3951e8ad3948..aa9ad85e0901d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -586,23 +586,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int segno, left_blocks; + unsigned segno, left_blocks; int i; - /* check current node segment */ + /* check current node sections in the worst case. */ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; - + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (node_blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (dent_blocks > left_blocks) return false; return true; @@ -651,7 +650,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } -- GitLab From aa25e54f44cb6969f4f39d2e1b5999bfc354f38e Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Wed, 28 Feb 2024 13:27:23 -0500 Subject: [PATCH 1042/1418] watchdog: stm32_iwdg: initialize default timeout [ Upstream commit dbd7c0088b7f44aa0b9276ed3449df075a7b5b54 ] The driver never sets a default timeout value, therefore it is initialized to zero. When CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is enabled, the watchdog is started during probe. The kernel is supposed to automatically ping the watchdog from this point until userspace takes over, but this does not happen if the configured timeout is zero. A zero timeout causes watchdog_need_worker() to return false, so the heartbeat worker does not run and the system therefore resets soon after the driver is probed. This patch fixes this by setting an arbitrary non-zero default timeout. The default could be read from the hardware instead, but I didn't see any reason to add this complexity. This has been tested on an STM32F746. Fixes: 85fdc63fe256 ("drivers: watchdog: stm32_iwdg: set WDOG_HW_RUNNING at probe") Signed-off-by: Ben Wolsieffer Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240228182723.12855-1-ben.wolsieffer@hefring.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/stm32_iwdg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 570a71509d2a9..78d51deab87aa 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -21,6 +21,8 @@ #include #include +#define DEFAULT_TIMEOUT 10 + /* IWDG registers */ #define IWDG_KR 0x00 /* Key register */ #define IWDG_PR 0x04 /* Prescaler Register */ @@ -249,6 +251,7 @@ static int stm32_iwdg_probe(struct platform_device *pdev) wdd->parent = dev; wdd->info = &stm32_iwdg_info; wdd->ops = &stm32_iwdg_ops; + wdd->timeout = DEFAULT_TIMEOUT; wdd->min_timeout = DIV_ROUND_UP((RLR_MIN + 1) * PR_MIN, wdt->rate); wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * wdt->data->max_prescaler * 1000) / wdt->rate; -- GitLab From 449684e376a051ca15502d6ebc8f25932927dc6b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 15:35:38 +0800 Subject: [PATCH 1043/1418] f2fs: ro: compress: fix to avoid caching unaligned extent [ Upstream commit 4b99ecd304290c4ef55666a62c89dfb2dbf0b2cd ] Mapping info from dump.f2fs: i_addr[0x2d] cluster flag [0xfffffffe : 4294967294] i_addr[0x2e] [0x 10428 : 66600] i_addr[0x2f] [0x 10429 : 66601] i_addr[0x30] [0x 1042a : 66602] f2fs_io fiemap 37 1 /mnt/f2fs/disk-58390c8c.raw Previsouly, it missed to align fofs and ofs_in_node to cluster_size, result in adding incorrect read extent cache, fix it. Before: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 37, len = 4, blkaddr = 66600, c_len = 3 After: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 36, len = 4, blkaddr = 66600, c_len = 3 Fixes: 94afd6d6e525 ("f2fs: extent cache: support unaligned extent") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 10 ++++++---- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/node.c | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5973fda2349c7..df6dfd7de6d0d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1843,16 +1843,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_page, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f5d69893d2d92..5ae1c4aa3ae92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4258,7 +4258,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4315,7 +4316,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5db6740d31364..fcf22a50ff5db 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -850,21 +850,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); if (blkaddr == COMPRESS_ADDR) blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; -- GitLab From 4d5e5a044b99c2c4e4b99262189f35efc862be3e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Feb 2024 22:16:53 +0100 Subject: [PATCH 1044/1418] NFS: Fix an off by one in root_nfs_cat() [ Upstream commit 698ad1a538da0b6bf969cfee630b4e3a026afb87 ] The intent is to check if 'dest' is truncated or not. So, >= should be used instead of >, because strlcat() returns the length of 'dest' and 'src' excluding the trailing NULL. Fixes: 56463e50d1fc ("NFS: Use super.c for NFSROOT mount option parsing") Signed-off-by: Christophe JAILLET Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfsroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 620329b7e6aeb..0b1c1d2e076c1 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src, size_t len = strlen(dest); if (len && dest[len - 1] != ',') - if (strlcat(dest, ",", destlen) > destlen) + if (strlcat(dest, ",", destlen) >= destlen) return -1; - if (strlcat(dest, src, destlen) > destlen) + if (strlcat(dest, src, destlen) >= destlen) return -1; return 0; } -- GitLab From 02b661956605839dd2d909fa08119288bc98b0ce Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 13 Jun 2023 15:51:57 +0800 Subject: [PATCH 1045/1418] f2fs: convert to use sbi directly [ Upstream commit c3355ea9d82fe6b1a4226c9a7d311f9c5715b456 ] F2FS_I_SB(inode) is redundant. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: b7d797d241c1 ("f2fs: compress: relocate some judgments in f2fs_reserve_compress_blocks") Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index be4cab941d299..c787a3f408ab3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3462,7 +3462,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) int ret; int writecount; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3475,7 +3475,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3644,7 +3644,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) unsigned int reserved_blocks = 0; int ret; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3660,7 +3660,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -4070,7 +4070,7 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4142,7 +4142,7 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); -- GitLab From a4e063d67e9341fbc08990d3fef8f6f4c1206326 Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:45 +0800 Subject: [PATCH 1046/1418] f2fs: compress: relocate some judgments in f2fs_reserve_compress_blocks [ Upstream commit b7d797d241c154d73ec5523f87f3b06d4f299da1 ] The following f2fs_io test will get a "0" result instead of -EINVAL, unisoc # ./f2fs_io compress file unisoc # ./f2fs_io reserve_cblocks file 0 it's not reasonable, so the judgement of atomic_read(&F2FS_I(inode)->i_compr_blocks) should be placed after the judgement of is_inode_flag_set(inode, FI_COMPRESS_RELEASED). Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c787a3f408ab3..5dbd874ba3d8d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3657,9 +3657,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3669,6 +3666,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } + if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + goto unlock_inode; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -3715,7 +3715,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } unlock_inode: inode_unlock(inode); -out: mnt_drop_write_file(filp); if (ret >= 0) { -- GitLab From 889846dfc8ee2cf31148a44bfd2faeb2faadc685 Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:46 +0800 Subject: [PATCH 1047/1418] f2fs: compress: fix reserve_cblocks counting error when out of space [ Upstream commit 2f6d721e14b69d6e1251f69fa238b48e8374e25f ] When a file only needs one direct_node, performing the following operations will cause the file to be unrepairable: unisoc # ./f2fs_io compress test.apk unisoc #df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.2M 100% /data unisoc # ./f2fs_io release_cblocks test.apk 924 unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 4.8M 100% /data unisoc # dd if=/dev/random of=file4 bs=1M count=3 3145728 bytes (3.0 M) copied, 0.025 s, 120 M/s unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 0 This is because the file has only one direct_node. After returning to -ENOSPC, reserved_blocks += ret will not be executed. As a result, the reserved_blocks at this time is still 0, which is not the real number of reserved blocks. Therefore, fsck cannot be set to repair the file. After this patch, the fsck flag will be set to fix this problem. unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot then fsck will be executed unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 924 Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5dbd874ba3d8d..2fbc8d89c600b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3561,10 +3561,10 @@ out: return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; @@ -3628,12 +3628,12 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) @@ -3694,7 +3694,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, F2FS_I(inode)->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); @@ -3702,13 +3702,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); @@ -3717,7 +3716,7 @@ unlock_inode: inode_unlock(inode); mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && atomic_read(&F2FS_I(inode)->i_compr_blocks)) { -- GitLab From 4fd4a210de9dc4fcaafcc571893d41039875a699 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 29 Jan 2024 16:36:26 +0530 Subject: [PATCH 1048/1418] perf/x86/amd/core: Avoid register reset when CPU is dead [ Upstream commit ad8c91282c95f801c37812d59d2d9eba6899b384 ] When bringing a CPU online, some of the PMC and LBR related registers are reset. The same is done when a CPU is taken offline although that is unnecessary. This currently happens in the "cpu_dead" callback which is also incorrect as the callback runs on a control CPU instead of the one that is being taken offline. This also affects hibernation and suspend to RAM on some platforms as reported in the link below. Fixes: 21d59e3e2c40 ("perf/x86/amd/core: Detect PerfMonV2 support") Reported-by: Mario Limonciello Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/550a026764342cf7e5812680e3e2b91fe662b5ac.1706526029.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 04f4b96dec6df..fd091b9dd7067 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,6 @@ static void amd_pmu_cpu_dead(int cpu) kfree(cpuhw->lbr_sel); cpuhw->lbr_sel = NULL; - amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; -- GitLab From 76426abf9b980b46983f97de8e5b25047b4c9863 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Mar 2024 11:08:41 +0000 Subject: [PATCH 1049/1418] afs: Revert "afs: Hide silly-rename files from userspace" [ Upstream commit 0aec3847d044273733285dcff90afda89ad461d2 ] This reverts commit 57e9d49c54528c49b8bffe6d99d782ea051ea534. This undoes the hiding of .__afsXXXX silly-rename files. The problem with hiding them is that rm can't then manually delete them. This also reverts commit 5f7a07646655fb4108da527565dcdc80124b14c4 ("afs: Fix endless loop in directory parsing") as that's a bugfix for the above. Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") Reported-by: Markus Suvanto Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008102.html Signed-off-by: David Howells Link: https://lore.kernel.org/r/3085695.1710328121@warthog.procyon.org.uk Reviewed-by: Jeffrey E Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/dir.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 6e2c967fae6fc..07dc4ec73520c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -473,16 +473,6 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, continue; } - /* Don't expose silly rename entries to userspace. */ - if (nlen > 6 && - dire->u.name[0] == '.' && - ctx->actor != afs_lookup_filldir && - ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) { - ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); - continue; - } - /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), -- GitLab From 31db25e3141b20e2a76a9f219eeca52e3cab126c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Mar 2024 11:11:53 -0400 Subject: [PATCH 1050/1418] nfs: fix panic when nfs4_ff_layout_prepare_ds() fails [ Upstream commit 719fcafe07c12646691bd62d7f8d94d657fa0766 ] We've been seeing the following panic in production BUG: kernel NULL pointer dereference, address: 0000000000000065 PGD 2f485f067 P4D 2f485f067 PUD 2cc5d8067 PMD 0 RIP: 0010:ff_layout_cancel_io+0x3a/0x90 [nfs_layout_flexfiles] Call Trace: ? __die+0x78/0xc0 ? page_fault_oops+0x286/0x380 ? __rpc_execute+0x2c3/0x470 [sunrpc] ? rpc_new_task+0x42/0x1c0 [sunrpc] ? exc_page_fault+0x5d/0x110 ? asm_exc_page_fault+0x22/0x30 ? ff_layout_free_layoutreturn+0x110/0x110 [nfs_layout_flexfiles] ? ff_layout_cancel_io+0x3a/0x90 [nfs_layout_flexfiles] ? ff_layout_cancel_io+0x6f/0x90 [nfs_layout_flexfiles] pnfs_mark_matching_lsegs_return+0x1b0/0x360 [nfsv4] pnfs_error_mark_layout_for_return+0x9e/0x110 [nfsv4] ? ff_layout_send_layouterror+0x50/0x160 [nfs_layout_flexfiles] nfs4_ff_layout_prepare_ds+0x11f/0x290 [nfs_layout_flexfiles] ff_layout_pg_init_write+0xf0/0x1f0 [nfs_layout_flexfiles] __nfs_pageio_add_request+0x154/0x6c0 [nfs] nfs_pageio_add_request+0x26b/0x380 [nfs] nfs_do_writepage+0x111/0x1e0 [nfs] nfs_writepages_callback+0xf/0x30 [nfs] write_cache_pages+0x17f/0x380 ? nfs_pageio_init_write+0x50/0x50 [nfs] ? nfs_writepages+0x6d/0x210 [nfs] ? nfs_writepages+0x6d/0x210 [nfs] nfs_writepages+0x125/0x210 [nfs] do_writepages+0x67/0x220 ? generic_perform_write+0x14b/0x210 filemap_fdatawrite_wbc+0x5b/0x80 file_write_and_wait_range+0x6d/0xc0 nfs_file_fsync+0x81/0x170 [nfs] ? nfs_file_mmap+0x60/0x60 [nfs] __x64_sys_fsync+0x53/0x90 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Inspecting the core with drgn I was able to pull this >>> prog.crashed_thread().stack_trace()[0] #0 at 0xffffffffa079657a (ff_layout_cancel_io+0x3a/0x84) in ff_layout_cancel_io at fs/nfs/flexfilelayout/flexfilelayout.c:2021:27 >>> prog.crashed_thread().stack_trace()[0]['idx'] (u32)1 >>> prog.crashed_thread().stack_trace()[0]['flseg'].mirror_array[1].mirror_ds (struct nfs4_ff_layout_ds *)0xffffffffffffffed This is clear from the stack trace, we call nfs4_ff_layout_prepare_ds() which could error out initializing the mirror_ds, and then we go to clean it all up and our check is only for if (!mirror->mirror_ds). This is inconsistent with the rest of the users of mirror_ds, which have if (IS_ERR_OR_NULL(mirror_ds)) to keep from tripping over this exact scenario. Fix this up in ff_layout_cancel_io() to make sure we don't panic when we get an error. I also spot checked all the other instances of checking mirror_ds and we appear to be doing the correct checks everywhere, only unconditionally dereferencing mirror_ds when we know it would be valid. Signed-off-by: Josef Bacik Fixes: b739a5bd9d9f ("NFSv4/flexfiles: Cancel I/O if the layout is recalled or revoked") Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 81bbafab18a99..4376881be7918 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2016,7 +2016,7 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg) for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { mirror = flseg->mirror_array[idx]; mirror_ds = mirror->mirror_ds; - if (!mirror_ds) + if (IS_ERR_OR_NULL(mirror_ds)) continue; ds = mirror->mirror_ds->ds; if (!ds) -- GitLab From b5a579ddf0bf52c0fe17ec85ea92644501125aba Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 1 Mar 2024 19:43:48 +0500 Subject: [PATCH 1051/1418] io_uring/net: correct the type of variable [ Upstream commit 86bcacc957fc2d0403aa0e652757eec59a5fd7ca ] The namelen is of type int. It shouldn't be made size_t which is unsigned. The signed number is needed for error checking before use. Fixes: c55978024d12 ("io_uring/net: move receive multishot out of the generic msghdr path") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240301144349.2807544-1-usama.anjum@collabora.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 9fc0ffb0b6c12..0d4ee3d738fbf 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -516,7 +516,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) static int io_recvmsg_mshot_prep(struct io_kiocb *req, struct io_async_msghdr *iomsg, - size_t namelen, size_t controllen) + int namelen, size_t controllen) { if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { -- GitLab From 5c594bdbd5199f81c9064d53ebb82ac081c449ea Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 14 Feb 2024 10:07:25 +0000 Subject: [PATCH 1052/1418] comedi: comedi_test: Prevent timers rescheduling during deletion commit f53641a6e849034a44bf80f50245a75d7a376025 upstream. The comedi_test devices have a couple of timers (ai_timer and ao_timer) that can be started to simulate hardware interrupts. Their expiry functions normally reschedule the timer. The driver code calls either del_timer_sync() or del_timer() to delete the timers from the queue, but does not currently prevent the timers from rescheduling themselves so synchronized deletion may be ineffective. Add a couple of boolean members (one for each timer: ai_timer_enable and ao_timer_enable) to the device private data structure to indicate whether the timers are allowed to reschedule themselves. Set the member to true when adding the timer to the queue, and to false when deleting the timer from the queue in the waveform_ai_cancel() and waveform_ao_cancel() functions. The del_timer_sync() function is also called from the waveform_detach() function, but the timer enable members will already be set to false when that function is called, so no change is needed there. Fixes: 403fe7f34e33 ("staging: comedi: comedi_test: fix timer race conditions") Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20240214100747.16203-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_test.c | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 0b5c0af1cebf0..626d53bf9146a 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -85,6 +85,8 @@ struct waveform_private { struct comedi_device *dev; /* parent comedi device */ u64 ao_last_scan_time; /* time of previous AO scan in usec */ unsigned int ao_scan_period; /* AO scan period in usec */ + bool ai_timer_enable:1; /* should AI timer be running? */ + bool ao_timer_enable:1; /* should AO timer be running? */ unsigned short ao_loopbacks[N_CHANS]; }; @@ -234,8 +236,12 @@ static void waveform_ai_timer(struct timer_list *t) time_increment = devpriv->ai_convert_time - now; else time_increment = 1; - mod_timer(&devpriv->ai_timer, - jiffies + usecs_to_jiffies(time_increment)); + spin_lock(&dev->spinlock); + if (devpriv->ai_timer_enable) { + mod_timer(&devpriv->ai_timer, + jiffies + usecs_to_jiffies(time_increment)); + } + spin_unlock(&dev->spinlock); } overrun: @@ -391,9 +397,12 @@ static int waveform_ai_cmd(struct comedi_device *dev, * Seem to need an extra jiffy here, otherwise timer expires slightly * early! */ + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = true; devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; add_timer(&devpriv->ai_timer); + spin_unlock_bh(&dev->spinlock); return 0; } @@ -402,6 +411,9 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ai_timer); @@ -493,8 +505,12 @@ static void waveform_ao_timer(struct timer_list *t) unsigned int time_inc = devpriv->ao_last_scan_time + devpriv->ao_scan_period - now; - mod_timer(&devpriv->ao_timer, - jiffies + usecs_to_jiffies(time_inc)); + spin_lock(&dev->spinlock); + if (devpriv->ao_timer_enable) { + mod_timer(&devpriv->ao_timer, + jiffies + usecs_to_jiffies(time_inc)); + } + spin_unlock(&dev->spinlock); } underrun: @@ -515,9 +531,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, async->inttrig = NULL; devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = true; devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); add_timer(&devpriv->ao_timer); + spin_unlock_bh(&dev->spinlock); return 1; } @@ -602,6 +621,9 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ao_timer); -- GitLab From 0e3aa17499641077c31cdf10bb1abb05454e5522 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Jun 2023 12:45:42 +0200 Subject: [PATCH 1053/1418] remoteproc: stm32: use correct format strings on 64-bit [ Upstream commit 03bd158e1535e68bcd2b1e095b0ebcad7c84bd20 ] With CONFIG_ARCH_STM32 making it into arch/arm64, a couple of format strings no longer work, since they rely on size_t being compatible with %x, or they print an 'int' using %z: drivers/remoteproc/stm32_rproc.c: In function 'stm32_rproc_mem_alloc': drivers/remoteproc/stm32_rproc.c:122:22: error: format '%x' expects argument of type 'unsigned int', but argument 5 has type 'size_t' {aka 'long unsigned int'} [-Werror=format=] drivers/remoteproc/stm32_rproc.c:122:40: note: format string is defined here 122 | dev_dbg(dev, "map memory: %pa+%x\n", &mem->dma, mem->len); | ~^ | | | unsigned int | %lx drivers/remoteproc/stm32_rproc.c:125:30: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'size_t' {aka 'long unsigned int'} [-Werror=format=] drivers/remoteproc/stm32_rproc.c:125:65: note: format string is defined here 125 | dev_err(dev, "Unable to map memory region: %pa+%x\n", | ~^ | | | unsigned int | %lx drivers/remoteproc/stm32_rproc.c: In function 'stm32_rproc_get_loaded_rsc_table': drivers/remoteproc/stm32_rproc.c:646:30: error: format '%zx' expects argument of type 'size_t', but argument 4 has type 'int' [-Werror=format=] drivers/remoteproc/stm32_rproc.c:646:66: note: format string is defined here 646 | dev_err(dev, "Unable to map memory region: %pa+%zx\n", | ~~^ | | | long unsigned int | %x Fix up all three instances to work across architectures, and enable compile testing for this driver to ensure it builds everywhere. Reviewed-by: Arnaud Pouliquen Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Arnd Bergmann Stable-dep-of: 32381bbccba4 ("remoteproc: stm32: Fix incorrect type in assignment for va") Signed-off-by: Sasha Levin --- drivers/remoteproc/Kconfig | 2 +- drivers/remoteproc/stm32_rproc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 1660197866531..d93113b6ffaa1 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -313,7 +313,7 @@ config ST_SLIM_REMOTEPROC config STM32_RPROC tristate "STM32 remoteproc support" - depends on ARCH_STM32 + depends on ARCH_STM32 || COMPILE_TEST depends on REMOTEPROC select MAILBOX help diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 8746cbb1f168d..e432febf4337b 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -118,10 +118,10 @@ static int stm32_rproc_mem_alloc(struct rproc *rproc, struct device *dev = rproc->dev.parent; void *va; - dev_dbg(dev, "map memory: %pa+%x\n", &mem->dma, mem->len); + dev_dbg(dev, "map memory: %pad+%zx\n", &mem->dma, mem->len); va = ioremap_wc(mem->dma, mem->len); if (IS_ERR_OR_NULL(va)) { - dev_err(dev, "Unable to map memory region: %pa+%x\n", + dev_err(dev, "Unable to map memory region: %pad+0x%zx\n", &mem->dma, mem->len); return -ENOMEM; } @@ -627,7 +627,7 @@ stm32_rproc_get_loaded_rsc_table(struct rproc *rproc, size_t *table_sz) ddata->rsc_va = devm_ioremap_wc(dev, rsc_pa, RSC_TBL_SIZE); if (IS_ERR_OR_NULL(ddata->rsc_va)) { - dev_err(dev, "Unable to map memory region: %pa+%zx\n", + dev_err(dev, "Unable to map memory region: %pa+%x\n", &rsc_pa, RSC_TBL_SIZE); ddata->rsc_va = NULL; return ERR_PTR(-ENOMEM); -- GitLab From b4f425956362fe0a4de7216f34bbfc192a0fdc6a Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 17 Jan 2024 14:53:11 +0100 Subject: [PATCH 1054/1418] remoteproc: stm32: Fix incorrect type in assignment for va [ Upstream commit 32381bbccba4c21145c571701f8f7fb1d9b3a92e ] The sparse tool complains about the attribute conversion between a _iomem void * and a void *: stm32_rproc.c:122:12: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void *va @@ got void [noderef] __iomem * @@ stm32_rproc.c:122:12: sparse: expected void *va stm32_rproc.c:122:12: sparse: got void [noderef] __iomem * Add '__force' to explicitly specify that the cast is intentional. This conversion is necessary to cast to virtual addresses pointer,used, by the remoteproc core. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312150052.HCiNKlqB-lkp@intel.com/ Fixes: 13140de09cc2 ("remoteproc: stm32: add an ST stm32_rproc driver") Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20240117135312.3381936-2-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/stm32_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index e432febf4337b..722cf1cdc2cb0 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -119,7 +119,7 @@ static int stm32_rproc_mem_alloc(struct rproc *rproc, void *va; dev_dbg(dev, "map memory: %pad+%zx\n", &mem->dma, mem->len); - va = ioremap_wc(mem->dma, mem->len); + va = (__force void *)ioremap_wc(mem->dma, mem->len); if (IS_ERR_OR_NULL(va)) { dev_err(dev, "Unable to map memory region: %pad+0x%zx\n", &mem->dma, mem->len); @@ -136,7 +136,7 @@ static int stm32_rproc_mem_release(struct rproc *rproc, struct rproc_mem_entry *mem) { dev_dbg(rproc->dev.parent, "unmap memory: %pa\n", &mem->dma); - iounmap(mem->va); + iounmap((__force __iomem void *)mem->va); return 0; } -- GitLab From e660319fc67c50a9310873fde832f1c04b5a7bc9 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 17 Jan 2024 14:53:12 +0100 Subject: [PATCH 1055/1418] remoteproc: stm32: Fix incorrect type assignment returned by stm32_rproc_get_loaded_rsc_tablef [ Upstream commit c77b35ce66af25bdd6fde60b62e35b9b316ea5c2 ] The sparse tool complains about the remove of the _iomem attribute. stm32_rproc.c:660:17: warning: cast removes address space '__iomem' of expression Add '__force' to explicitly specify that the cast is intentional. This conversion is necessary to cast to addresses pointer, which are then managed by the remoteproc core as a pointer to a resource_table structure. Fixes: 8a471396d21c ("remoteproc: stm32: Move resource table setup to rproc_ops") Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20240117135312.3381936-3-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/stm32_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 722cf1cdc2cb0..385e931603ad3 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -641,7 +641,7 @@ done: * entire area by overwriting it with the initial values stored in rproc->clean_table. */ *table_sz = RSC_TBL_SIZE; - return (struct resource_table *)ddata->rsc_va; + return (__force struct resource_table *)ddata->rsc_va; } static const struct rproc_ops st_rproc_ops = { -- GitLab From 22ae3d106391b51234167d9eed4dbfc37a366583 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 23 Jan 2024 17:51:09 -0500 Subject: [PATCH 1056/1418] usb: phy: generic: Get the vbus supply [ Upstream commit 75fd6485cccef269ac9eb3b71cf56753341195ef ] While support for working with a vbus was added, the regulator was never actually gotten (despite what was documented). Fix this by actually getting the supply from the device tree. Fixes: 7acc9973e3c4 ("usb: phy: generic: add vbus support") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240123225111.1629405-3-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/phy/phy-generic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 3dc5c04e7cbf9..953df04b40d40 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -265,6 +265,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); + if (PTR_ERR(nop->vbus_draw) == -ENODEV) + nop->vbus_draw = NULL; + if (IS_ERR(nop->vbus_draw)) + return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), + "could not get vbus regulator\n"); + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); if (PTR_ERR(nop->vbus_draw) == -ENODEV) nop->vbus_draw = NULL; -- GitLab From 52af9897e787bdaffa839f5d6fb535448a3c559a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 22 Jan 2024 12:03:17 +0100 Subject: [PATCH 1057/1418] tty: vt: fix 20 vs 0x20 typo in EScsiignore [ Upstream commit 0e6a92f67c8a94707f7bb27ac29e2bdf3e7c167d ] The if (c >= 20 && c <= 0x3f) test added in commit 7a99565f8732 is wrong. 20 is DC4 in ascii and it makes no sense to consider that as the bottom limit. Instead, it should be 0x20 as in the other test in the commit above. This is supposed to NOT change anything as we handle interesting 20-0x20 asciis far before this if. So for sakeness, change to 0x20 (which is SPACE). Signed-off-by: "Jiri Slaby (SUSE)" Fixes: 7a99565f8732 ("vt: ignore csi sequences with intermediate characters.") Cc: Martin Hostettler Link: https://lore.kernel.org/all/ZaP45QY2WEsDqoxg@neutronstar.dyndns.org/ Tested-by: Helge Deller # parisc STI console Link: https://lore.kernel.org/r/20240122110401.7289-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 981d2bfcf9a5b..9e30ef2b6eb8c 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2515,7 +2515,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } return; case EScsiignore: - if (c >= 20 && c <= 0x3f) + if (c >= 0x20 && c <= 0x3f) return; vc->vc_state = ESnormal; return; -- GitLab From e955764b41676ebaba44b478f4e7e2afb43a0140 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 18 Jan 2024 10:22:01 -0500 Subject: [PATCH 1058/1418] serial: max310x: fix syntax error in IRQ error message [ Upstream commit 8ede8c6f474255b2213cccd7997b993272a8e2f9 ] Replace g with q. Helpful when grepping thru source code or logs for "request" keyword. Fixes: f65444187a66 ("serial: New serial driver MAX310X") Reviewed-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240118152213.2644269-6-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 163a89f84c9c2..444f89eb2d4b7 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1459,7 +1459,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty if (!ret) return 0; - dev_err(dev, "Unable to reguest IRQ %i\n", irq); + dev_err(dev, "Unable to request IRQ %i\n", irq); out_uart: for (i = 0; i < devtype->nr; i++) { -- GitLab From ad7362db2fd718f2c9d12eefa5cb50a0376fc328 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 19 Jan 2024 10:45:08 +0000 Subject: [PATCH 1059/1418] tty: serial: samsung: fix tx_empty() to return TIOCSER_TEMT [ Upstream commit 314c2b399288f0058a8c5b6683292cbde5f1531b ] The core expects for tx_empty() either TIOCSER_TEMT when the tx is empty or 0 otherwise. s3c24xx_serial_txempty_nofifo() might return 0x4, and at least uart_get_lsr_info() tries to clear exactly TIOCSER_TEMT (BIT(1)). Fix tx_empty() to return TIOCSER_TEMT. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Tudor Ambarus Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20240119104526.1221243-2-tudor.ambarus@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/samsung_tty.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index aa2c51b84116f..589daed19e625 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -996,11 +996,10 @@ static unsigned int s3c24xx_serial_tx_empty(struct uart_port *port) if ((ufstat & info->tx_fifomask) != 0 || (ufstat & info->tx_fifofull)) return 0; - - return 1; + return TIOCSER_TEMT; } - return s3c24xx_serial_txempty_nofifo(port); + return s3c24xx_serial_txempty_nofifo(port) ? TIOCSER_TEMT : 0; } /* no modem control lines */ -- GitLab From 8aa68d9fc22cca4ea6dab7eac2639f090811c248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 11 Jan 2024 12:56:36 +0100 Subject: [PATCH 1060/1418] arm64: dts: broadcom: bcmbca: bcm4908: drop invalid switch cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 27058b95fbb784406ea4c40b20caa3f04937140c ] Ethernet switch does not have addressable subnodes. This fixes: arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dtb: ethernet-switch@0: '#address-cells', '#size-cells' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/net/dsa/brcm,sf2.yaml# Fixes: 527a3ac9bdf8 ("arm64: dts: broadcom: bcm4908: describe internal switch") Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20240111115636.12095-1-zajec5@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi index df71348542064..a4c5a38905b03 100644 --- a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi @@ -180,9 +180,6 @@ brcm,num-gphy = <5>; brcm,num-rgmii-ports = <2>; - #address-cells = <1>; - #size-cells = <0>; - ports: ports { #address-cells = <1>; #size-cells = <0>; -- GitLab From c609ce8b3c6fcc3a89184727ae6375d27136528f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 3 Feb 2024 00:57:59 +0900 Subject: [PATCH 1061/1418] kconfig: fix infinite loop when expanding a macro at the end of file [ Upstream commit af8bbce92044dc58e4cc039ab94ee5d470a621f5 ] A macro placed at the end of a file with no newline causes an infinite loop. [Test Kconfig] $(info,hello) \ No newline at end of file I realized that flex-provided input() returns 0 instead of EOF when it reaches the end of a file. Fixes: 104daea149c4 ("kconfig: reference environment variables directly and remove 'option env='") Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/lexer.l | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index cc386e4436834..2c2b3e6f248ca 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -302,8 +302,11 @@ static char *expand_token(const char *in, size_t n) new_string(); append_string(in, n); - /* get the whole line because we do not know the end of token. */ - while ((c = input()) != EOF) { + /* + * get the whole line because we do not know the end of token. + * input() returns 0 (not EOF!) when it reachs the end of file. + */ + while ((c = input()) != 0) { if (c == '\n') { unput(c); break; -- GitLab From bc493a56abf85a264544b8c10b30838bcb3f8613 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 8 Jan 2024 12:19:06 +0000 Subject: [PATCH 1062/1418] hwtracing: hisi_ptt: Move type check to the beginning of hisi_ptt_pmu_event_init() [ Upstream commit 06226d120a28f146abd3637799958a4dc4dbb7a1 ] When perf_init_event() calls perf_try_init_event() to init pmu driver, searches for the next pmu driver only when the return value is -ENOENT. Therefore, hisi_ptt_pmu_event_init() needs to check the type at the beginning of the function. Otherwise, in the case of perf-task mode, perf_try_init_event() returns -EOPNOTSUPP and skips subsequent pmu drivers, causes perf_init_event() to fail. Fixes: ff0de066b463 ("hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device") Signed-off-by: Yang Jihong Reviewed-by: Yicong Yang Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240108121906.3514820-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/ptt/hisi_ptt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 8d8fa8e8afe04..20a9cddb3723a 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -654,6 +654,9 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) int ret; u32 val; + if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) + return -ENOENT; + if (event->cpu < 0) { dev_dbg(event->pmu->dev, "Per-task mode not supported\n"); return -EOPNOTSUPP; @@ -662,9 +665,6 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) if (event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; - if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) - return -ENOENT; - ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config); if (ret < 0) return ret; -- GitLab From bea2dfd5fa3f7a5d1ff8de5599efe846427fdbcd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2024 21:02:58 -0800 Subject: [PATCH 1063/1418] rtc: mt6397: select IRQ_DOMAIN instead of depending on it [ Upstream commit 544c42f798e1651dcb04fb0395219bf0f1c2607e ] IRQ_DOMAIN is a hidden (not user visible) symbol. Users cannot set it directly thru "make *config", so drivers should select it instead of depending on it if they need it. Relying on it being set for a dependency is risky. Consistently using "select" or "depends on" can also help reduce Kconfig circular dependency issues. Therefore, change the use of "depends on" for IRQ_DOMAIN to "select" for RTC_DRV_MT6397. Fixes: 04d3ba70a3c9 ("rtc: mt6397: add IRQ domain dependency") Cc: Arnd Bergmann Cc: Eddie Huang Cc: Sean Wang Cc: Matthias Brugger Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-rtc@vger.kernel.org Cc: Marc Zyngier Cc: Philipp Zabel Cc: Peter Rosin Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240213050258.6167-1-rdunlap@infradead.org Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index bb63edb507da4..87dc050ca004c 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1843,7 +1843,8 @@ config RTC_DRV_MT2712 config RTC_DRV_MT6397 tristate "MediaTek PMIC based RTC" - depends on MFD_MT6397 || (COMPILE_TEST && IRQ_DOMAIN) + depends on MFD_MT6397 || COMPILE_TEST + select IRQ_DOMAIN help This selects the MediaTek(R) RTC driver. RTC is part of MediaTek MT6397 PMIC. You should enable MT6397 PMIC MFD before select -- GitLab From d2b48ecc760ae540b298bcf88a050c8bceef9662 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Feb 2024 17:04:57 +0200 Subject: [PATCH 1064/1418] serial: 8250_exar: Don't remove GPIO device on suspend [ Upstream commit 73b5a5c00be39e23b194bad10e1ea8bb73eee176 ] It seems a copy&paste mistake that suspend callback removes the GPIO device. There is no counterpart of this action, means once suspended there is no more GPIO device available untile full unbind-bind cycle is performed. Remove suspicious GPIO device removal in suspend. Fixes: d0aeaa83f0b0 ("serial: exar: split out the exar code from 8250_pci") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240219150627.2101198-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_exar.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index dca1abe363248..55451ff846520 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -714,6 +714,7 @@ static void exar_pci_remove(struct pci_dev *pcidev) for (i = 0; i < priv->nr; i++) serial8250_unregister_port(priv->line[i]); + /* Ensure that every init quirk is properly torn down */ if (priv->board->exit) priv->board->exit(pcidev); } @@ -728,10 +729,6 @@ static int __maybe_unused exar_suspend(struct device *dev) if (priv->line[i] >= 0) serial8250_suspend_port(priv->line[i]); - /* Ensure that every init quirk is properly torn down */ - if (priv->board->exit) - priv->board->exit(pcidev); - return 0; } -- GitLab From 1d4ff6a8f85e78c1a8dec849e781f58162dbabbe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Mar 2024 10:04:48 +0300 Subject: [PATCH 1065/1418] staging: greybus: fix get_channel_from_mode() failure path [ Upstream commit 34164202a5827f60a203ca9acaf2d9f7d432aac8 ] The get_channel_from_mode() function is supposed to return the channel which matches the mode. But it has a bug where if it doesn't find a matching channel then it returns the last channel. It should return NULL instead. Also remove an unnecessary NULL check on "channel". Fixes: 2870b52bae4c ("greybus: lights: add lights implementation") Signed-off-by: Dan Carpenter Reviewed-by: Rui Miguel Silva Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/379c0cb4-39e0-4293-8a18-c7b1298e5420@moroto.mountain Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/greybus/light.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 87d36948c6106..c6bd86a5335ab 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -100,15 +100,15 @@ static struct led_classdev *get_channel_cdev(struct gb_channel *channel) static struct gb_channel *get_channel_from_mode(struct gb_light *light, u32 mode) { - struct gb_channel *channel = NULL; + struct gb_channel *channel; int i; for (i = 0; i < light->channels_count; i++) { channel = &light->channels[i]; - if (channel && channel->mode == mode) - break; + if (channel->mode == mode) + return channel; } - return channel; + return NULL; } static int __gb_lights_flash_intensity_set(struct gb_channel *channel, -- GitLab From 2ca629b90dc9a0b3d6c1125b905fd0e2c886a1f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Mar 2024 18:17:34 +0000 Subject: [PATCH 1066/1418] usb: gadget: net2272: Use irqflags in the call to net2272_probe_fin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 600556809f04eb3bbccd05218215dcd7b285a9a9 ] Currently the variable irqflags is being set but is not being used, it appears it should be used in the call to net2272_probe_fin rather than IRQF_TRIGGER_LOW being used. Kudos to Uwe Kleine-König for suggesting the fix. Cleans up clang scan build warning: drivers/usb/gadget/udc/net2272.c:2610:15: warning: variable 'irqflags' set but not used [-Wunused-but-set-variable] Fixes: ceb80363b2ec ("USB: net2272: driver for PLX NET2272 USB device controller") Signed-off-by: Colin Ian King Acked-by: Alan Stern Link: https://lore.kernel.org/r/20240307181734.2034407-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/net2272.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 538c1b9a28835..c42d5aa99e81a 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2650,7 +2650,7 @@ net2272_plat_probe(struct platform_device *pdev) goto err_req; } - ret = net2272_probe_fin(dev, IRQF_TRIGGER_LOW); + ret = net2272_probe_fin(dev, irqflags); if (ret) goto err_io; -- GitLab From d23e49f4e4c1dc6fc74a2007c58d16ce0f2431b6 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 5 Mar 2024 15:36:28 +0100 Subject: [PATCH 1067/1418] ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates [ Upstream commit 9e2ab4b18ebd46813fc3459207335af4d368e323 ] The sample rates set by the rockchip_i2s_tdm driver in master mode are inaccurate up to 5% in several cases, due to the driver logic to configure clocks and a nasty interaction with the Common Clock Framework. To understand what happens, here is the relevant section of the clock tree (slightly simplified), along with the names used in the driver: vpll0 _OR_ vpll1 "mclk_root" clk_i2s2_8ch_tx_src "mclk_parent" clk_i2s2_8ch_tx_mux clk_i2s2_8ch_tx "mclk" or "mclk_tx" This is what happens when playing back e.g. at 192 kHz using audio-graph-card (when recording the same applies, only s/tx/rx/): 0. at probe, rockchip_i2s_tdm_set_sysclk() stores the passed frequency in i2s_tdm->mclk_tx_freq (*) which is 50176000, and that is never modified afterwards 1. when playback is started, rockchip_i2s_tdm_hw_params() is called and does the following two calls 2. rockchip_i2s_tdm_calibrate_mclk(): 2a. selects mclk_root0 (vpll0) as a parent for mclk_parent (mclk_tx_src), which is OK because the vpll0 rate is a good for 192000 (and sumbultiple) rates 2b. sets the mclk_root frequency based on ppm calibration computations 2c. sets mclk_tx_src to 49152000 (= 256 * 192000), which is also OK as it is a multiple of the required bit clock 3. rockchip_i2s_tdm_set_mclk() 3a. calls clk_set_rate() to set the rate of mclk_tx (clk_i2s2_8ch_tx) to the value of i2s_tdm->mclk_tx_freq (*), i.e. 50176000 which is not a multiple of the sampling frequency -- this is not OK 3a1. clk_set_rate() reacts by reparenting clk_i2s2_8ch_tx_src to vpll1 -- this is not OK because the default vpll1 rate can be divided to get 44.1 kHz and related rates, not 192 kHz The result is that the driver does a lot of ad-hoc decisions about clocks and ends up in using the wrong parent at an unoptimal rate. Step 0 is one part of the problem: unless the card driver calls set_sysclk at each stream start, whatever rate is set in mclk_tx_freq during boot will be taken and used until reboot. Moreover the driver does not care if its value is not a multiple of any audio frequency. Another part of the problem is that the whole reparenting and clock rate setting logic is conflicting with the CCF algorithms to achieve largely the same goal: selecting the best parent and setting the closest clock rate. And it turns out that only calling once clk_set_rate() on clk_i2s2_8ch_tx picks the correct vpll and sets the correct rate. The fix is based on removing the custom logic in the driver to select the parent and set the various clocks, and just let the Clock Framework do it all. As a side effect, the set_sysclk() op becomes useless because we now let the CCF compute the appropriate value for the sampling rate. It also implies that the whole calibration logic is now dead code and so it is removed along with the "PCM Clock Compensation in PPM" kcontrol, which has always been broken anyway. The handling of the 4 optional clocks also becomes dead code and is removed. The actual rates have been tested playing 30 seconds of audio at various sampling rates before and after this change using sox: time play -r -n synth 30 sine 950 gain -3 The time reported in the table below is the 'real' value reported by the 'time' command in the above command line. rate before after --------- ------ ------ 8000 Hz 30.60s 30.63s 11025 Hz 30.45s 30.51s 16000 Hz 30.47s 30.50s 22050 Hz 30.78s 30.41s 32000 Hz 31.02s 30.43s 44100 Hz 30.78s 30.41s 48000 Hz 29.81s 30.45s 88200 Hz 30.78s 30.41s 96000 Hz 29.79s 30.42s 176400 Hz 27.40s 30.41s 192000 Hz 29.79s 30.42s While the tests are running the clock tree confirms that: * without the patch, vpll1 is always used and clk_i2s2_8ch_tx always produces 50176000 Hz, which cannot be divided for most audio rates except the slowest ones, generating inaccurate rates * with the patch: - for 192000 Hz vpll0 is used - for 176400 Hz vpll1 is used - clk_i2s2_8ch_tx always produces (256 * ) Hz Tested on the RK3308 using the internal audio codec. Fixes: 081068fd6414 ("ASoC: rockchip: add support for i2s-tdm controller") Signed-off-by: Luca Ceresoli Link: https://msgid.link/r/20240305-rk3308-audio-codec-v4-1-312acdbe628f@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_i2s_tdm.c | 352 +------------------------- 1 file changed, 6 insertions(+), 346 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 2550bd2a5e78c..2e36a97077b99 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -27,8 +27,6 @@ #define DEFAULT_MCLK_FS 256 #define CH_GRP_MAX 4 /* The max channel 8 / 2 */ #define MULTIPLEX_CH_MAX 10 -#define CLK_PPM_MIN -1000 -#define CLK_PPM_MAX 1000 #define TRCM_TXRX 0 #define TRCM_TX 1 @@ -55,20 +53,6 @@ struct rk_i2s_tdm_dev { struct clk *hclk; struct clk *mclk_tx; struct clk *mclk_rx; - /* The mclk_tx_src is parent of mclk_tx */ - struct clk *mclk_tx_src; - /* The mclk_rx_src is parent of mclk_rx */ - struct clk *mclk_rx_src; - /* - * The mclk_root0 and mclk_root1 are root parent and supplies for - * the different FS. - * - * e.g: - * mclk_root0 is VPLL0, used for FS=48000Hz - * mclk_root1 is VPLL1, used for FS=44100Hz - */ - struct clk *mclk_root0; - struct clk *mclk_root1; struct regmap *regmap; struct regmap *grf; struct snd_dmaengine_dai_dma_data capture_dma_data; @@ -78,19 +62,11 @@ struct rk_i2s_tdm_dev { struct rk_i2s_soc_data *soc_data; bool is_master_mode; bool io_multiplex; - bool mclk_calibrate; bool tdm_mode; - unsigned int mclk_rx_freq; - unsigned int mclk_tx_freq; - unsigned int mclk_root0_freq; - unsigned int mclk_root1_freq; - unsigned int mclk_root0_initial_freq; - unsigned int mclk_root1_initial_freq; unsigned int frame_width; unsigned int clk_trcm; unsigned int i2s_sdis[CH_GRP_MAX]; unsigned int i2s_sdos[CH_GRP_MAX]; - int clk_ppm; int refcount; spinlock_t lock; /* xfer lock */ bool has_playback; @@ -116,12 +92,6 @@ static void i2s_tdm_disable_unprepare_mclk(struct rk_i2s_tdm_dev *i2s_tdm) { clk_disable_unprepare(i2s_tdm->mclk_tx); clk_disable_unprepare(i2s_tdm->mclk_rx); - if (i2s_tdm->mclk_calibrate) { - clk_disable_unprepare(i2s_tdm->mclk_tx_src); - clk_disable_unprepare(i2s_tdm->mclk_rx_src); - clk_disable_unprepare(i2s_tdm->mclk_root0); - clk_disable_unprepare(i2s_tdm->mclk_root1); - } } /** @@ -144,29 +114,9 @@ static int i2s_tdm_prepare_enable_mclk(struct rk_i2s_tdm_dev *i2s_tdm) ret = clk_prepare_enable(i2s_tdm->mclk_rx); if (ret) goto err_mclk_rx; - if (i2s_tdm->mclk_calibrate) { - ret = clk_prepare_enable(i2s_tdm->mclk_tx_src); - if (ret) - goto err_mclk_rx; - ret = clk_prepare_enable(i2s_tdm->mclk_rx_src); - if (ret) - goto err_mclk_rx_src; - ret = clk_prepare_enable(i2s_tdm->mclk_root0); - if (ret) - goto err_mclk_root0; - ret = clk_prepare_enable(i2s_tdm->mclk_root1); - if (ret) - goto err_mclk_root1; - } return 0; -err_mclk_root1: - clk_disable_unprepare(i2s_tdm->mclk_root0); -err_mclk_root0: - clk_disable_unprepare(i2s_tdm->mclk_rx_src); -err_mclk_rx_src: - clk_disable_unprepare(i2s_tdm->mclk_tx_src); err_mclk_rx: clk_disable_unprepare(i2s_tdm->mclk_tx); err_mclk_tx: @@ -566,159 +516,6 @@ static void rockchip_i2s_tdm_xfer_resume(struct snd_pcm_substream *substream, I2S_XFER_RXS_START); } -static int rockchip_i2s_tdm_clk_set_rate(struct rk_i2s_tdm_dev *i2s_tdm, - struct clk *clk, unsigned long rate, - int ppm) -{ - unsigned long rate_target; - int delta, ret; - - if (ppm == i2s_tdm->clk_ppm) - return 0; - - if (ppm < 0) - delta = -1; - else - delta = 1; - - delta *= (int)div64_u64((u64)rate * (u64)abs(ppm) + 500000, - 1000000); - - rate_target = rate + delta; - - if (!rate_target) - return -EINVAL; - - ret = clk_set_rate(clk, rate_target); - if (ret) - return ret; - - i2s_tdm->clk_ppm = ppm; - - return 0; -} - -static int rockchip_i2s_tdm_calibrate_mclk(struct rk_i2s_tdm_dev *i2s_tdm, - struct snd_pcm_substream *substream, - unsigned int lrck_freq) -{ - struct clk *mclk_root; - struct clk *mclk_parent; - unsigned int mclk_root_freq; - unsigned int mclk_root_initial_freq; - unsigned int mclk_parent_freq; - unsigned int div, delta; - u64 ppm; - int ret; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - mclk_parent = i2s_tdm->mclk_tx_src; - else - mclk_parent = i2s_tdm->mclk_rx_src; - - switch (lrck_freq) { - case 8000: - case 16000: - case 24000: - case 32000: - case 48000: - case 64000: - case 96000: - case 192000: - mclk_root = i2s_tdm->mclk_root0; - mclk_root_freq = i2s_tdm->mclk_root0_freq; - mclk_root_initial_freq = i2s_tdm->mclk_root0_initial_freq; - mclk_parent_freq = DEFAULT_MCLK_FS * 192000; - break; - case 11025: - case 22050: - case 44100: - case 88200: - case 176400: - mclk_root = i2s_tdm->mclk_root1; - mclk_root_freq = i2s_tdm->mclk_root1_freq; - mclk_root_initial_freq = i2s_tdm->mclk_root1_initial_freq; - mclk_parent_freq = DEFAULT_MCLK_FS * 176400; - break; - default: - dev_err(i2s_tdm->dev, "Invalid LRCK frequency: %u Hz\n", - lrck_freq); - return -EINVAL; - } - - ret = clk_set_parent(mclk_parent, mclk_root); - if (ret) - return ret; - - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, mclk_root, - mclk_root_freq, 0); - if (ret) - return ret; - - delta = abs(mclk_root_freq % mclk_parent_freq - mclk_parent_freq); - ppm = div64_u64((uint64_t)delta * 1000000, (uint64_t)mclk_root_freq); - - if (ppm) { - div = DIV_ROUND_CLOSEST(mclk_root_initial_freq, mclk_parent_freq); - if (!div) - return -EINVAL; - - mclk_root_freq = mclk_parent_freq * round_up(div, 2); - - ret = clk_set_rate(mclk_root, mclk_root_freq); - if (ret) - return ret; - - i2s_tdm->mclk_root0_freq = clk_get_rate(i2s_tdm->mclk_root0); - i2s_tdm->mclk_root1_freq = clk_get_rate(i2s_tdm->mclk_root1); - } - - return clk_set_rate(mclk_parent, mclk_parent_freq); -} - -static int rockchip_i2s_tdm_set_mclk(struct rk_i2s_tdm_dev *i2s_tdm, - struct snd_pcm_substream *substream, - struct clk **mclk) -{ - unsigned int mclk_freq; - int ret; - - if (i2s_tdm->clk_trcm) { - if (i2s_tdm->mclk_tx_freq != i2s_tdm->mclk_rx_freq) { - dev_err(i2s_tdm->dev, - "clk_trcm, tx: %d and rx: %d should be the same\n", - i2s_tdm->mclk_tx_freq, - i2s_tdm->mclk_rx_freq); - return -EINVAL; - } - - ret = clk_set_rate(i2s_tdm->mclk_tx, i2s_tdm->mclk_tx_freq); - if (ret) - return ret; - - ret = clk_set_rate(i2s_tdm->mclk_rx, i2s_tdm->mclk_rx_freq); - if (ret) - return ret; - - /* mclk_rx is also ok. */ - *mclk = i2s_tdm->mclk_tx; - } else { - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - *mclk = i2s_tdm->mclk_tx; - mclk_freq = i2s_tdm->mclk_tx_freq; - } else { - *mclk = i2s_tdm->mclk_rx; - mclk_freq = i2s_tdm->mclk_rx_freq; - } - - ret = clk_set_rate(*mclk, mclk_freq); - if (ret) - return ret; - } - - return 0; -} - static int rockchip_i2s_ch_to_io(unsigned int ch, bool substream_capture) { if (substream_capture) { @@ -849,19 +646,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct rk_i2s_tdm_dev *i2s_tdm = to_info(dai); - struct clk *mclk; - int ret = 0; unsigned int val = 0; unsigned int mclk_rate, bclk_rate, div_bclk = 4, div_lrck = 64; + int err; if (i2s_tdm->is_master_mode) { - if (i2s_tdm->mclk_calibrate) - rockchip_i2s_tdm_calibrate_mclk(i2s_tdm, substream, - params_rate(params)); + struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + i2s_tdm->mclk_tx : i2s_tdm->mclk_rx; - ret = rockchip_i2s_tdm_set_mclk(i2s_tdm, substream, &mclk); - if (ret) - return ret; + err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); + if (err) + return err; mclk_rate = clk_get_rate(mclk); bclk_rate = i2s_tdm->frame_width * params_rate(params); @@ -969,96 +764,6 @@ static int rockchip_i2s_tdm_trigger(struct snd_pcm_substream *substream, return 0; } -static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream, - unsigned int freq, int dir) -{ - struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai); - - /* Put set mclk rate into rockchip_i2s_tdm_set_mclk() */ - if (i2s_tdm->clk_trcm) { - i2s_tdm->mclk_tx_freq = freq; - i2s_tdm->mclk_rx_freq = freq; - } else { - if (stream == SNDRV_PCM_STREAM_PLAYBACK) - i2s_tdm->mclk_tx_freq = freq; - else - i2s_tdm->mclk_rx_freq = freq; - } - - dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n", - stream ? "rx" : "tx", freq); - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 1; - uinfo->value.integer.min = CLK_PPM_MIN; - uinfo->value.integer.max = CLK_PPM_MAX; - uinfo->value.integer.step = 1; - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); - - ucontrol->value.integer.value[0] = i2s_tdm->clk_ppm; - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); - int ret = 0, ppm = 0; - int changed = 0; - unsigned long old_rate; - - if (ucontrol->value.integer.value[0] < CLK_PPM_MIN || - ucontrol->value.integer.value[0] > CLK_PPM_MAX) - return -EINVAL; - - ppm = ucontrol->value.integer.value[0]; - - old_rate = clk_get_rate(i2s_tdm->mclk_root0); - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, i2s_tdm->mclk_root0, - i2s_tdm->mclk_root0_freq, ppm); - if (ret) - return ret; - if (old_rate != clk_get_rate(i2s_tdm->mclk_root0)) - changed = 1; - - if (clk_is_match(i2s_tdm->mclk_root0, i2s_tdm->mclk_root1)) - return changed; - - old_rate = clk_get_rate(i2s_tdm->mclk_root1); - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, i2s_tdm->mclk_root1, - i2s_tdm->mclk_root1_freq, ppm); - if (ret) - return ret; - if (old_rate != clk_get_rate(i2s_tdm->mclk_root1)) - changed = 1; - - return changed; -} - -static struct snd_kcontrol_new rockchip_i2s_tdm_compensation_control = { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = "PCM Clock Compensation in PPM", - .info = rockchip_i2s_tdm_clk_compensation_info, - .get = rockchip_i2s_tdm_clk_compensation_get, - .put = rockchip_i2s_tdm_clk_compensation_put, -}; - static int rockchip_i2s_tdm_dai_probe(struct snd_soc_dai *dai) { struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); @@ -1068,9 +773,6 @@ static int rockchip_i2s_tdm_dai_probe(struct snd_soc_dai *dai) if (i2s_tdm->has_playback) dai->playback_dma_data = &i2s_tdm->playback_dma_data; - if (i2s_tdm->mclk_calibrate) - snd_soc_add_dai_controls(dai, &rockchip_i2s_tdm_compensation_control, 1); - return 0; } @@ -1110,7 +812,6 @@ static int rockchip_i2s_tdm_set_bclk_ratio(struct snd_soc_dai *dai, static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = { .hw_params = rockchip_i2s_tdm_hw_params, .set_bclk_ratio = rockchip_i2s_tdm_set_bclk_ratio, - .set_sysclk = rockchip_i2s_tdm_set_sysclk, .set_fmt = rockchip_i2s_tdm_set_fmt, .set_tdm_slot = rockchip_dai_tdm_slot, .trigger = rockchip_i2s_tdm_trigger, @@ -1433,35 +1134,6 @@ static void rockchip_i2s_tdm_path_config(struct rk_i2s_tdm_dev *i2s_tdm, rockchip_i2s_tdm_tx_path_config(i2s_tdm, num); } -static int rockchip_i2s_tdm_get_calibrate_mclks(struct rk_i2s_tdm_dev *i2s_tdm) -{ - int num_mclks = 0; - - i2s_tdm->mclk_tx_src = devm_clk_get(i2s_tdm->dev, "mclk_tx_src"); - if (!IS_ERR(i2s_tdm->mclk_tx_src)) - num_mclks++; - - i2s_tdm->mclk_rx_src = devm_clk_get(i2s_tdm->dev, "mclk_rx_src"); - if (!IS_ERR(i2s_tdm->mclk_rx_src)) - num_mclks++; - - i2s_tdm->mclk_root0 = devm_clk_get(i2s_tdm->dev, "mclk_root0"); - if (!IS_ERR(i2s_tdm->mclk_root0)) - num_mclks++; - - i2s_tdm->mclk_root1 = devm_clk_get(i2s_tdm->dev, "mclk_root1"); - if (!IS_ERR(i2s_tdm->mclk_root1)) - num_mclks++; - - if (num_mclks < 4 && num_mclks != 0) - return -ENOENT; - - if (num_mclks == 4) - i2s_tdm->mclk_calibrate = 1; - - return 0; -} - static int rockchip_i2s_tdm_path_prepare(struct rk_i2s_tdm_dev *i2s_tdm, struct device_node *np, bool is_rx_path) @@ -1609,11 +1281,6 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) i2s_tdm->io_multiplex = of_property_read_bool(node, "rockchip,io-multiplex"); - ret = rockchip_i2s_tdm_get_calibrate_mclks(i2s_tdm); - if (ret) - return dev_err_probe(i2s_tdm->dev, ret, - "mclk-calibrate clocks missing"); - regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(regs)) { return dev_err_probe(i2s_tdm->dev, PTR_ERR(regs), @@ -1666,13 +1333,6 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) goto err_disable_hclk; } - if (i2s_tdm->mclk_calibrate) { - i2s_tdm->mclk_root0_initial_freq = clk_get_rate(i2s_tdm->mclk_root0); - i2s_tdm->mclk_root1_initial_freq = clk_get_rate(i2s_tdm->mclk_root1); - i2s_tdm->mclk_root0_freq = i2s_tdm->mclk_root0_initial_freq; - i2s_tdm->mclk_root1_freq = i2s_tdm->mclk_root1_initial_freq; - } - pm_runtime_enable(&pdev->dev); regmap_update_bits(i2s_tdm->regmap, I2S_DMACR, I2S_DMACR_TDL_MASK, -- GitLab From d2034a6b92cafb70097eb363edad77f394e0079b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 Mar 2024 17:20:37 +1000 Subject: [PATCH 1068/1418] nouveau: reset the bo resource bus info after an eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f35c9af45ea7a4b1115b193d84858b14d13517fc ] Later attempts to refault the bo won't happen and the whole GPU does to lunch. I think Christian's refactoring of this code out to the driver broke this not very well tested path. Fixes: 141b15e59175 ("drm/nouveau: move io_reserve_lru handling into the driver v5") Cc: Christian König Signed-off-by: Dave Airlie Acked-by: Christian König Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240311072037.287905-1-airlied@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 126b3c6e12f99..f2dca41e46c5f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1194,6 +1194,8 @@ out: drm_vma_node_unmap(&nvbo->bo.base.vma_node, bdev->dev_mapping); nouveau_ttm_io_mem_free_locked(drm, nvbo->bo.resource); + nvbo->bo.resource->bus.offset = 0; + nvbo->bo.resource->bus.addr = NULL; goto retry; } -- GitLab From 6df9cf77e0c7c2fe246844ddc545a6f8d3704865 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Mar 2024 12:01:21 -0800 Subject: [PATCH 1069/1418] tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge() [ Upstream commit 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 ] inet_twsk_purge() uses rcu to find TIME_WAIT and NEW_SYN_RECV objects to purge. These objects use SLAB_TYPESAFE_BY_RCU semantic and need special care. We need to use refcount_inc_not_zero(&sk->sk_refcnt). Reuse the existing correct logic I wrote for TIME_WAIT, because both structures have common locations for sk_state, sk_family, and netns pointer. If after the refcount_inc_not_zero() the object fields longer match the keys, use sock_gen_put(sk) to release the refcount. Then we can call inet_twsk_deschedule_put() for TIME_WAIT, inet_csk_reqsk_queue_drop_and_put() for NEW_SYN_RECV sockets, with BH disabled. Then we need to restart the loop because we had drop rcu_read_lock(). Fixes: 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()") Link: https://lore.kernel.org/netdev/CANn89iLvFuuihCtt9PME2uS1WJATnf5fKjDToa1WzVnRzHnPfg@mail.gmail.com/T/#u Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308200122.64357-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_timewait_sock.c | 41 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1d77d992e6e77..340a8f0c29800 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -281,12 +281,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); +/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { - struct inet_timewait_sock *tw; - struct sock *sk; struct hlist_nulls_node *node; unsigned int slot; + struct sock *sk; for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -295,38 +295,35 @@ restart_rcu: rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) { - /* A kernel listener socket might not hold refcnt for net, - * so reqsk_timer_handler() could be fired after net is - * freed. Userspace listener and reqsk never exist here. - */ - if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && - hashinfo->pernet)) { - struct request_sock *req = inet_reqsk(sk); - - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); - } + int state = inet_sk_state_load(sk); + if ((1 << state) & ~(TCPF_TIME_WAIT | + TCPF_NEW_SYN_RECV)) continue; - } - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count)) + if (sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count)) continue; - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count))) { - inet_twsk_put(tw); + if (unlikely(sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count))) { + sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule_put(tw); + if (state == TCP_TIME_WAIT) { + inet_twsk_deschedule_put(inet_twsk(sk)); + } else { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + } local_bh_enable(); goto restart_rcu; } -- GitLab From 9905a157048f441f1412e7bd13372f4a971d75c6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 8 Mar 2024 12:01:22 -0800 Subject: [PATCH 1070/1418] rds: tcp: Fix use-after-free of net in reqsk_timer_handler(). [ Upstream commit 2a750d6a5b365265dbda33330a6188547ddb5c24 ] syzkaller reported a warning of netns tracker [0] followed by KASAN splat [1] and another ref tracker warning [1]. syzkaller could not find a repro, but in the log, the only suspicious sequence was as follows: 18:26:22 executing program 1: r0 = socket$inet6_mptcp(0xa, 0x1, 0x106) ... connect$inet6(r0, &(0x7f0000000080)={0xa, 0x4001, 0x0, @loopback}, 0x1c) (async) The notable thing here is 0x4001 in connect(), which is RDS_TCP_PORT. So, the scenario would be: 1. unshare(CLONE_NEWNET) creates a per netns tcp listener in rds_tcp_listen_init(). 2. syz-executor connect()s to it and creates a reqsk. 3. syz-executor exit()s immediately. 4. netns is dismantled. [0] 5. reqsk timer is fired, and UAF happens while freeing reqsk. [1] 6. listener is freed after RCU grace period. [2] Basically, reqsk assumes that the listener guarantees netns safety until all reqsk timers are expired by holding the listener's refcount. However, this was not the case for kernel sockets. Commit 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()") fixed this issue only for per-netns ehash. Let's apply the same fix for the global ehash. [0]: ref_tracker: net notrefcnt@0000000065449cc3 has 1/1 users at sk_alloc (./include/net/net_namespace.h:337 net/core/sock.c:2146) inet6_create (net/ipv6/af_inet6.c:192 net/ipv6/af_inet6.c:119) __sock_create (net/socket.c:1572) rds_tcp_listen_init (net/rds/tcp_listen.c:279) rds_tcp_init_net (net/rds/tcp.c:577) ops_init (net/core/net_namespace.c:137) setup_net (net/core/net_namespace.c:340) copy_net_ns (net/core/net_namespace.c:497) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3429) __x64_sys_unshare (kernel/fork.c:3496) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) ... WARNING: CPU: 0 PID: 27 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) [1]: BUG: KASAN: slab-use-after-free in inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) Read of size 8 at addr ffff88801b370400 by task swapper/0/0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) kasan_report (mm/kasan/report.c:603) inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) reqsk_timer_handler (net/ipv4/inet_connection_sock.c:979 net/ipv4/inet_connection_sock.c:1092) call_timer_fn (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/timer.h:127 kernel/time/timer.c:1701) __run_timers.part.0 (kernel/time/timer.c:1752 kernel/time/timer.c:2038) run_timer_softirq (kernel/time/timer.c:2053) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) Allocated by task 258 on cpu 0 at 83.612050s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) __kasan_slab_alloc (mm/kasan/common.c:343) kmem_cache_alloc (mm/slub.c:3813 mm/slub.c:3860 mm/slub.c:3867) copy_net_ns (./include/linux/slab.h:701 net/core/net_namespace.c:421 net/core/net_namespace.c:480) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3429) __x64_sys_unshare (kernel/fork.c:3496) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) Freed by task 27 on cpu 0 at 329.158864s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) kasan_save_free_info (mm/kasan/generic.c:643) __kasan_slab_free (mm/kasan/common.c:265) kmem_cache_free (mm/slub.c:4299 mm/slub.c:4363) cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:639) process_one_work (kernel/workqueue.c:2638) worker_thread (kernel/workqueue.c:2700 kernel/workqueue.c:2787) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The buggy address belongs to the object at ffff88801b370000 which belongs to the cache net_namespace of size 4352 The buggy address is located 1024 bytes inside of freed 4352-byte region [ffff88801b370000, ffff88801b371100) [2]: WARNING: CPU: 0 PID: 95 at lib/ref_tracker.c:228 ref_tracker_free (lib/ref_tracker.c:228 (discriminator 1)) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:ref_tracker_free (lib/ref_tracker.c:228 (discriminator 1)) ... Call Trace: __sk_destruct (./include/net/net_namespace.h:353 net/core/sock.c:2204) rcu_core (./arch/x86/include/asm/preempt.h:26 kernel/rcu/tree.c:2165 kernel/rcu/tree.c:2433) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) Reported-by: syzkaller Suggested-by: Eric Dumazet Fixes: 467fa15356ac ("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308200122.64357-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_minisocks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 42844d20da020..b3bfa1a09df68 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -357,10 +357,6 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); } else if (!purged_once) { - /* The last refcount is decremented in tcp_sk_exit_batch() */ - if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) - continue; - inet_twsk_purge(&tcp_hashinfo, family); purged_once = true; } -- GitLab From ec0e06c797a0582a22fa4eecfe13f29b87b15c17 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Tue, 12 Mar 2024 12:36:22 +0530 Subject: [PATCH 1071/1418] octeontx2-af: Use matching wake_up API variant in CGX command interface [ Upstream commit e642921dfeed1e15e73f78f2c3b6746f72b6deb2 ] Use wake_up API instead of wake_up_interruptible, since wait_event_timeout API is used for waiting on command completion. Fixes: 1463f382f58d ("octeontx2-af: Add support for CGX link management") Signed-off-by: Linu Cherian Signed-off-by: Sunil Goutham Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 90be87dc105d3..e6fe599f7bf3a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1346,7 +1346,7 @@ static irqreturn_t cgx_fwi_event_handler(int irq, void *data) /* Release thread waiting for completion */ lmac->cmd_pend = false; - wake_up_interruptible(&lmac->wq_cmd_cmplt); + wake_up(&lmac->wq_cmd_cmplt); break; case CGX_EVT_ASYNC: if (cgx_event_is_linkevent(event)) -- GitLab From abc9b13fd9217d2eec297c74e5dc6653c16d3ddb Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Wed, 6 Mar 2024 12:31:52 +0100 Subject: [PATCH 1072/1418] s390/vtime: fix average steal time calculation [ Upstream commit 367c50f78451d3bd7ad70bc5c89f9ba6dec46ca9 ] Current average steal timer calculation produces volatile and inflated values. The only user of this value is KVM so far and it uses that to decide whether or not to yield the vCPU which is seeing steal time. KVM compares average steal timer to a threshold and if the threshold is past then it does not allow CPU polling and yields it to host, else it keeps the CPU by polling. Since KVM's steal time threshold is very low by default (%10) it most likely is not effected much by the bloated average steal timer values because the operating region is pretty small. However there might be new users in the future who might rely on this number. Fix average steal timer calculation by changing the formula from: avg_steal_timer = avg_steal_timer / 2 + steal_timer; to the following: avg_steal_timer = (avg_steal_timer + steal_timer) / 2; This ensures that avg_steal_timer is actually a naive average of steal timer values. It now closely follows steal timer values but of course in a smoother manner. Fixes: 152e9b8676c6 ("s390/vtime: steal time exponential moving average") Signed-off-by: Mete Durlu Acked-by: Heiko Carstens Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/vtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9436f3053b88c..003c926a0f4de 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk) virt_timer_expire(); steal = S390_lowcore.steal_timer; - avg_steal = S390_lowcore.avg_steal_timer / 2; + avg_steal = S390_lowcore.avg_steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - S390_lowcore.avg_steal_timer = avg_steal; + S390_lowcore.avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) -- GitLab From bd2474a45df7c11412c2587de3d4e43760531418 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Mar 2024 20:46:28 +0000 Subject: [PATCH 1073/1418] net/sched: taprio: proper TCA_TAPRIO_TC_ENTRY_INDEX check [ Upstream commit 343041b59b7810f9cdca371f445dd43b35c740b1 ] taprio_parse_tc_entry() is not correctly checking TCA_TAPRIO_TC_ENTRY_INDEX attribute: int tc; // Signed value tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); if (tc >= TC_QOPT_MAX_QUEUE) { NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); return -ERANGE; } syzbot reported that it could fed arbitary negative values: UBSAN: shift-out-of-bounds in net/sched/sch_taprio.c:1722:18 shift exponent -2147418108 is negative CPU: 0 PID: 5066 Comm: syz-executor367 Not tainted 6.8.0-rc7-syzkaller-00136-gc8a5c731fd12 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_shift_out_of_bounds+0x3c7/0x420 lib/ubsan.c:386 taprio_parse_tc_entry net/sched/sch_taprio.c:1722 [inline] taprio_parse_tc_entries net/sched/sch_taprio.c:1768 [inline] taprio_change+0xb87/0x57d0 net/sched/sch_taprio.c:1877 taprio_init+0x9da/0xc80 net/sched/sch_taprio.c:2134 qdisc_create+0x9d4/0x1190 net/sched/sch_api.c:1355 tc_modify_qdisc+0xa26/0x1e40 net/sched/sch_api.c:1776 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6617 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f1b2dea3759 Code: 48 83 c4 28 c3 e8 d7 19 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd4de452f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f1b2def0390 RCX: 00007f1b2dea3759 RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000004 RBP: 0000000000000003 R08: 0000555500000000 R09: 0000555500000000 R10: 0000555500000000 R11: 0000000000000246 R12: 00007ffd4de45340 R13: 00007ffd4de45310 R14: 0000000000000001 R15: 00007ffd4de45340 Fixes: a54fc09e4cba ("net/sched: taprio: allow user input of per-tc max SDU") Reported-and-tested-by: syzbot+a340daa06412d6028918@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Vladimir Oltean Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d5eebb2dd1b1..1d4638aa4254f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -765,7 +765,8 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { - [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE), [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, }; -- GitLab From 1ff7ffcac109edb7542a1716756de4e4192d42f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Feb 2024 20:34:36 +0100 Subject: [PATCH 1074/1418] soc: fsl: dpio: fix kcalloc() argument order [ Upstream commit 72ebb41b88f9d7c10c5e159e0507074af0a22fe2 ] A previous bugfix added a call to kcalloc(), which starting in gcc-14 causes a harmless warning about the argument order: drivers/soc/fsl/dpio/dpio-service.c: In function 'dpaa2_io_service_enqueue_multiple_desc_fq': drivers/soc/fsl/dpio/dpio-service.c:526:29: error: 'kcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 526 | ed = kcalloc(sizeof(struct qbman_eq_desc), 32, GFP_KERNEL); | ^~~~~~ drivers/soc/fsl/dpio/dpio-service.c:526:29: note: earlier argument should specify number of elements, later size of each element Since the two are only multiplied, the order does not change the behavior, so just fix it now to shut up the compiler warning. Dmity independently came up with the same fix. Fixes: 5c4a5999b245 ("soc: fsl: dpio: avoid stack usage warning") Reported-by: Dmitry Antipov Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/fsl/dpio/dpio-service.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 1d2b27e3ea63f..b811446e0fa55 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -523,7 +523,7 @@ int dpaa2_io_service_enqueue_multiple_desc_fq(struct dpaa2_io *d, struct qbman_eq_desc *ed; int i, ret; - ed = kcalloc(sizeof(struct qbman_eq_desc), 32, GFP_KERNEL); + ed = kcalloc(32, sizeof(struct qbman_eq_desc), GFP_KERNEL); if (!ed) return -ENOMEM; -- GitLab From 86d9b040421bbd26425f5a3edc226f57ecdecbfe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 8 Mar 2024 12:16:23 -0800 Subject: [PATCH 1075/1418] tcp: Fix refcnt handling in __inet_hash_connect(). [ Upstream commit 04d9d1fc428ac9f581d55118d67e0cb546701feb ] syzbot reported a warning in sk_nulls_del_node_init_rcu(). The commit 66b60b0c8c4a ("dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished().") tried to fix an issue that an unconnected socket occupies an ehash entry when bhash2 allocation fails. In such a case, we need to revert changes done by check_established(), which does not hold refcnt when inserting socket into ehash. So, to revert the change, we need to __sk_nulls_add_node_rcu() instead of sk_nulls_add_node_rcu(). Otherwise, sock_put() will cause refcnt underflow and leak the socket. [0]: WARNING: CPU: 0 PID: 23948 at include/net/sock.h:799 sk_nulls_del_node_init_rcu+0x166/0x1a0 include/net/sock.h:799 Modules linked in: CPU: 0 PID: 23948 Comm: syz-executor.2 Not tainted 6.8.0-rc6-syzkaller-00159-gc055fc00c07b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:sk_nulls_del_node_init_rcu+0x166/0x1a0 include/net/sock.h:799 Code: e8 7f 71 c6 f7 83 fb 02 7c 25 e8 35 6d c6 f7 4d 85 f6 0f 95 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 1b 6d c6 f7 90 <0f> 0b 90 eb b2 e8 10 6d c6 f7 4c 89 e7 be 04 00 00 00 e8 63 e7 d2 RSP: 0018:ffffc900032d7848 EFLAGS: 00010246 RAX: ffffffff89cd0035 RBX: 0000000000000001 RCX: 0000000000040000 RDX: ffffc90004de1000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 1ffff1100439ac26 R08: ffffffff89ccffe3 R09: 1ffff1100439ac28 R10: dffffc0000000000 R11: ffffed100439ac29 R12: ffff888021cd6140 R13: dffffc0000000000 R14: ffff88802a9bf5c0 R15: ffff888021cd6130 FS: 00007f3b823f16c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3b823f0ff8 CR3: 000000004674a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __inet_hash_connect+0x140f/0x20b0 net/ipv4/inet_hashtables.c:1139 dccp_v6_connect+0xcb9/0x1480 net/dccp/ipv6.c:956 __inet_stream_connect+0x262/0xf30 net/ipv4/af_inet.c:678 inet_stream_connect+0x65/0xa0 net/ipv4/af_inet.c:749 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f3b8167dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3b823f10c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00007f3b817abf80 RCX: 00007f3b8167dda9 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000003 RBP: 00007f3b823f1120 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f3b817abf80 R15: 00007ffd3beb57b8 Reported-by: syzbot+12c506c1aae251e70449@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=12c506c1aae251e70449 Fixes: 66b60b0c8c4a ("dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished().") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308201623.65448-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 56776e1b1de52..0ad25e6783ac7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1117,7 +1117,7 @@ error: sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(lock); - sk_nulls_del_node_init_rcu(sk); + __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); sk->sk_hash = 0; -- GitLab From 1ed222ca7396938eb1ab2d034f1ba0d8b00a7122 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 13 Mar 2024 00:27:19 +0900 Subject: [PATCH 1076/1418] hsr: Fix uninit-value access in hsr_get_node() [ Upstream commit ddbec99f58571301679addbc022256970ca3eac6 ] KMSAN reported the following uninit-value access issue [1]: ===================================================== BUG: KMSAN: uninit-value in hsr_get_node+0xa2e/0xa40 net/hsr/hsr_framereg.c:246 hsr_get_node+0xa2e/0xa40 net/hsr/hsr_framereg.c:246 fill_frame_info net/hsr/hsr_forward.c:577 [inline] hsr_forward_skb+0xe12/0x30e0 net/hsr/hsr_forward.c:615 hsr_dev_xmit+0x1a1/0x270 net/hsr/hsr_device.c:223 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3564 __dev_queue_xmit+0x33b8/0x5130 net/core/dev.c:4349 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x8b1d/0x9f30 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2787 packet_alloc_skb net/packet/af_packet.c:2936 [inline] packet_snd net/packet/af_packet.c:3030 [inline] packet_sendmsg+0x70e8/0x9f30 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 1 PID: 5033 Comm: syz-executor334 Not tainted 6.7.0-syzkaller-00562-g9f8413c4a66f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 ===================================================== If the packet type ID field in the Ethernet header is either ETH_P_PRP or ETH_P_HSR, but it is not followed by an HSR tag, hsr_get_skb_sequence_nr() reads an invalid value as a sequence number. This causes the above issue. This patch fixes the issue by returning NULL if the Ethernet header is not followed by an HSR tag. Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-and-tested-by: syzbot+2ef3a8ce8e91b5a50098@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2ef3a8ce8e91b5a50098 [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20240312152719.724530-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_framereg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 0b01998780952..e44a039e36afe 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -235,6 +235,10 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, */ if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { + /* Check if skb contains hsr_ethhdr */ + if (skb->mac_len < sizeof(struct hsr_ethhdr)) + return NULL; + /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ -- GitLab From cb8ae8e5ec286b790555e523b60eeadf675cdc64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:27:07 +0100 Subject: [PATCH 1077/1418] nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers [ Upstream commit b794d1c2ad6d7921f2867ce393815ad31b5b5a83 ] The reserved_tags are only needed for fabrics controllers. Right now only fabrics drivers call this helper, so this is harmless, but we'll use it in the PCIe driver soon. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Stable-dep-of: de105068fead ("nvme: fix reconnection fail due to reserved tag allocation") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0c088db944706..98a8d90feb37d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5029,7 +5029,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) -- GitLab From 1883ed12d702fdd0c49fae0351cf0060b15167a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:28:48 +0100 Subject: [PATCH 1078/1418] nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set [ Upstream commit 93b24f579c392bac2e491fee79ad5ce5a131992e ] Add the apple shared tag workaround to nvme_alloc_io_tag_set to prepare for using that helper in the PCIe driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Stable-dep-of: de105068fead ("nvme: fix reconnection fail due to reserved tag allocation") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 98a8d90feb37d..951c8946701aa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5029,7 +5029,13 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - if (ctrl->ops->flags & NVME_F_FABRICS) + /* + * Some Apple controllers requires tags to be unique across admin and + * the (only) I/O queue, so reserve the first 32 tags of the I/O queue. + */ + if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) + set->reserved_tags = NVME_AQ_DEPTH; + else if (ctrl->ops->flags & NVME_F_FABRICS) set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; -- GitLab From 149afee5c7418ec5db9d7387b9c9a5c1eb7ea2a8 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Mon, 11 Mar 2024 10:09:27 +0800 Subject: [PATCH 1079/1418] nvme: fix reconnection fail due to reserved tag allocation [ Upstream commit de105068fead55ed5c07ade75e9c8e7f86a00d1d ] We found a issue on production environment while using NVMe over RDMA, admin_q reconnect failed forever while remote target and network is ok. After dig into it, we found it may caused by a ABBA deadlock due to tag allocation. In my case, the tag was hold by a keep alive request waiting inside admin_q, as we quiesced admin_q while reset ctrl, so the request maked as idle and will not process before reset success. As fabric_q shares tagset with admin_q, while reconnect remote target, we need a tag for connect command, but the only one reserved tag was held by keep alive command which waiting inside admin_q. As a result, we failed to reconnect admin_q forever. In order to fix this issue, I think we should keep two reserved tags for admin queue. Fixes: ed01fee283a0 ("nvme-fabrics: only reserve a single tag") Signed-off-by: Chunguang Xu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/fabrics.h | 7 ------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 951c8946701aa..d7516e99275b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4971,7 +4971,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->ops = ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect and keep alive */ + set->reserved_tags = 2; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_NO_SCHED; if (ctrl->ops->flags & NVME_F_BLOCKING) @@ -5036,7 +5037,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) set->reserved_tags = NVME_AQ_DEPTH; else if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect */ + set->reserved_tags = 1; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index dcac3df8a5f76..60c238caf7a97 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -18,13 +18,6 @@ /* default is -1: the fail fast mechanism is disabled */ #define NVMF_DEF_FAIL_FAST_TMO -1 -/* - * Reserved one command for internal usage. This command is used for sending - * the connect command, as well as for the keep alive command on the admin - * queue once live. - */ -#define NVMF_RESERVED_TAGS 1 - /* * Define a host as seen by the target. We allocate one at boot, but also * allow the override it when creating controllers. This is both to provide -- GitLab From 6b62bad2da1b338f452a9380639fc9b093d75a25 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 13 Mar 2024 22:50:18 +0000 Subject: [PATCH 1080/1418] net: mediatek: mtk_eth_soc: clear MAC_MCR_FORCE_LINK only when MAC is up [ Upstream commit f1b85ef15a99f06ed48871ce933d591127d2dcc0 ] Clearing bit MAC_MCR_FORCE_LINK which forces the link down too early can result in MAC ending up in a broken/blocked state. Fix this by handling this bit in the .mac_link_up and .mac_link_down calls instead of in .mac_finish. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Suggested-by: Mason-cw Chang Signed-off-by: Daniel Golle Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 17e6ac4445afc..fecf3dd22dfaa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -561,8 +561,7 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode, mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | - MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | - MAC_MCR_RX_FIFO_CLR_DIS; + MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_RX_FIFO_CLR_DIS; /* Only update control register when needed! */ if (mcr_new != mcr_cur) @@ -610,7 +609,7 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, phylink_config); u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); - mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN); + mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK); mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } @@ -649,7 +648,7 @@ static void mtk_mac_link_up(struct phylink_config *config, if (rx_pause) mcr |= MAC_MCR_FORCE_RX_FC; - mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN; + mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK; mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } -- GitLab From f78807362828ad01db2a9ed005bf79501b620f27 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 13 Mar 2024 22:50:40 +0000 Subject: [PATCH 1081/1418] net: ethernet: mtk_eth_soc: fix PPE hanging issue [ Upstream commit ea80e3ed09ab2c2b75724faf5484721753e92c31 ] A patch to resolve an issue was found in MediaTek's GPL-licensed SDK: In the mtk_ppe_stop() function, the PPE scan mode is not disabled before disabling the PPE. This can potentially lead to a hang during the process of disabling the PPE. Without this patch, the PPE may experience a hang during the reboot test. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/b40da332dfe763932a82f9f62a4709457a15dd6c Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Suggested-by: Bc-bocun Chen Signed-off-by: Daniel Golle Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_ppe.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index d6eed204574a9..c64211e22ae70 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -811,7 +811,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe) MTK_PPE_KEEPALIVE_DISABLE) | FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) | FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE, - MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) | + MTK_PPE_SCAN_MODE_CHECK_AGE) | FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM, MTK_PPE_ENTRIES_SHIFT); if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2)) @@ -895,17 +895,21 @@ int mtk_ppe_stop(struct mtk_ppe *ppe) mtk_ppe_cache_enable(ppe, false); - /* disable offload engine */ - ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); - ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); - /* disable aging */ val = MTK_PPE_TB_CFG_AGE_NON_L4 | MTK_PPE_TB_CFG_AGE_UNBIND | MTK_PPE_TB_CFG_AGE_TCP | MTK_PPE_TB_CFG_AGE_UDP | - MTK_PPE_TB_CFG_AGE_TCP_FIN; + MTK_PPE_TB_CFG_AGE_TCP_FIN | + MTK_PPE_TB_CFG_SCAN_MODE; ppe_clear(ppe, MTK_PPE_TB_CFG, val); - return mtk_ppe_wait_busy(ppe); + if (mtk_ppe_wait_busy(ppe)) + return -ETIMEDOUT; + + /* disable offload engine */ + ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); + ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); + + return 0; } -- GitLab From ef7eed7e11d23337310ecc2c014ecaeea52719c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2024 14:18:16 +0000 Subject: [PATCH 1082/1418] packet: annotate data-races around ignore_outgoing [ Upstream commit 6ebfad33161afacb3e1e59ed1c2feefef70f9f97 ] ignore_outgoing is read locklessly from dev_queue_xmit_nit() and packet_getsockopt() Add appropriate READ_ONCE()/WRITE_ONCE() annotations. syzbot reported: BUG: KCSAN: data-race in dev_queue_xmit_nit / packet_setsockopt write to 0xffff888107804542 of 1 bytes by task 22618 on cpu 0: packet_setsockopt+0xd83/0xfd0 net/packet/af_packet.c:4003 do_sock_setsockopt net/socket.c:2311 [inline] __sys_setsockopt+0x1d8/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2340 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff888107804542 of 1 bytes by task 27 on cpu 1: dev_queue_xmit_nit+0x82/0x620 net/core/dev.c:2248 xmit_one net/core/dev.c:3527 [inline] dev_hard_start_xmit+0xcc/0x3f0 net/core/dev.c:3547 __dev_queue_xmit+0xf24/0x1dd0 net/core/dev.c:4335 dev_queue_xmit include/linux/netdevice.h:3091 [inline] batadv_send_skb_packet+0x264/0x300 net/batman-adv/send.c:108 batadv_send_broadcast_skb+0x24/0x30 net/batman-adv/send.c:127 batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:392 [inline] batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:420 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x3f0/0x4b0 net/batman-adv/bat_iv_ogm.c:1700 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0x465/0x990 kernel/workqueue.c:3335 worker_thread+0x526/0x730 kernel/workqueue.c:3416 kthread+0x1d1/0x210 kernel/kthread.c:388 ret_from_fork+0x4b/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 value changed: 0x00 -> 0x01 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 27 Comm: kworker/u8:1 Tainted: G W 6.8.0-syzkaller-08073-g480e035fc4c7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet Fixes: fa788d986a3a ("packet: add sockopt to ignore outgoing packets") Reported-by: syzbot+c669c1136495a2e7c31f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/CANn89i+Z7MfbkBLOv=p7KZ7=K1rKHO4P1OL5LYDCtBiyqsa9oQ@mail.gmail.com/T/#t Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 2 +- net/packet/af_packet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 60619fe8af5fc..9a48a7e26cf46 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2271,7 +2271,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { - if (ptype->ignore_outgoing) + if (READ_ONCE(ptype->ignore_outgoing)) continue; /* Never send packets back to the socket diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c3117350f5fbb..7188ca8d84693 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3981,7 +3981,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (val < 0 || val > 1) return -EINVAL; - po->prot_hook.ignore_outgoing = !!val; + WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: @@ -4110,7 +4110,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_IGNORE_OUTGOING: - val = po->prot_hook.ignore_outgoing; + val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) -- GitLab From d343a618bc3c90de6266efb946195cbd63ea705b Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 13 Mar 2024 19:37:58 +0100 Subject: [PATCH 1083/1418] net: veth: do not manipulate GRO when using XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7db7775ea2e31502d46427f5efd385afc4ff1eb ] Commit d3256efd8e8b ("veth: allow enabling NAPI even without XDP") tried to fix the fact that GRO was not possible without XDP, because veth did not use NAPI without XDP. However, it also introduced the behaviour that GRO is always enabled, when XDP is enabled. While it might be desired for most cases, it is confusing for the user at best as the GRO flag suddenly changes, when an XDP program is attached. It also introduces some complexities in state management as was partially addressed in commit fe9f801355f0 ("net: veth: clear GRO when clearing XDP even when down"). But the biggest problem is that it is not possible to disable GRO at all, when an XDP program is attached, which might be needed for some use cases. Fix this by not touching the GRO flag on XDP enable/disable as the code already supports switching to NAPI if either GRO or XDP is requested. Link: https://lore.kernel.org/lkml/20240311124015.38106-1-ignat@cloudflare.com/ Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Fixes: fe9f801355f0 ("net: veth: clear GRO when clearing XDP even when down") Signed-off-by: Ignat Korchagin Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/veth.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index dd9f5f1461921..8dcd3b6e143b9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1444,8 +1444,6 @@ static netdev_features_t veth_fix_features(struct net_device *dev, if (peer_priv->_xdp_prog) features &= ~NETIF_F_GSO_SOFTWARE; } - if (priv->_xdp_prog) - features |= NETIF_F_GRO; return features; } @@ -1542,14 +1540,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding - * XDP is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } - peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1560,14 +1550,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; -- GitLab From be4512b9ac6fc53e1ca8daccbda84f643215c547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:28:35 +0300 Subject: [PATCH 1084/1418] net: dsa: mt7530: prevent possible incorrect XTAL frequency selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f490c492e946d8ffbe65ad4efc66de3c5ede30a4 ] On MT7530, the HT_XTAL_FSEL field of the HWTRAP register stores a 2-bit value that represents the frequency of the crystal oscillator connected to the switch IC. The field is populated by the state of the ESW_P4_LED_0 and ESW_P4_LED_0 pins, which is done right after reset is deasserted. ESW_P4_LED_0 ESW_P3_LED_0 Frequency ----------------------------------------- 0 0 Reserved 0 1 20MHz 1 0 40MHz 1 1 25MHz On MT7531, the XTAL25 bit of the STRAP register stores this. The LAN0LED0 pin is used to populate the bit. 25MHz when the pin is high, 40MHz when it's low. These pins are also used with LEDs, therefore, their state can be set to something other than the bootstrapping configuration. For example, a link may be established on port 3 before the DSA subdriver takes control of the switch which would set ESW_P3_LED_0 to high. Currently on mt7530_setup() and mt7531_setup(), 1000 - 1100 usec delay is described between reset assertion and deassertion. Some switch ICs in real life conditions cannot always have these pins set back to the bootstrapping configuration before reset deassertion in this amount of delay. This causes wrong crystal frequency to be selected which puts the switch in a nonfunctional state after reset deassertion. The tests below are conducted on an MT7530 with a 40MHz crystal oscillator by Justin Swartz. With a cable from an active peer connected to port 3 before reset, an incorrect crystal frequency (0b11 = 25MHz) is selected: [1] [3] [5] : : : _____________________________ __________________ ESW_P4_LED_0 |_______| _____________________________ ESW_P3_LED_0 |__________________________ : : : : : : [4]...: : : [2]................: [1] Reset is asserted. [2] Period of 1000 - 1100 usec. [3] Reset is deasserted. [4] Period of 315 usec. HWTRAP register is populated with incorrect XTAL frequency. [5] Signals reflect the bootstrapped configuration. Increase the delay between reset_control_assert() and reset_control_deassert(), and gpiod_set_value_cansleep(priv->reset, 0) and gpiod_set_value_cansleep(priv->reset, 1) to 5000 - 5100 usec. This amount ensures a higher possibility that the switch IC will have these pins back to the bootstrapping configuration before reset deassertion. With a cable from an active peer connected to port 3 before reset, the correct crystal frequency (0b10 = 40MHz) is selected: [1] [2-1] [3] [5] : : : : _____________________________ __________________ ESW_P4_LED_0 |_______| ___________________ _______ ESW_P3_LED_0 |_________| |__________________ : : : : : : [2-2]...: [4]...: [2]................: [1] Reset is asserted. [2] Period of 5000 - 5100 usec. [2-1] ESW_P3_LED_0 goes low. [2-2] Remaining period of 5000 - 5100 usec. [3] Reset is deasserted. [4] Period of 310 usec. HWTRAP register is populated with bootstrapped XTAL frequency. [5] Signals reflect the bootstrapped configuration. ESW_P3_LED_0 low period before reset deassertion: 5000 usec - 5100 usec TEST RESET HOLD # (usec) --------------------- 1 5410 2 5440 3 4375 4 5490 5 5475 6 4335 7 4370 8 5435 9 4205 10 4335 11 3750 12 3170 13 4395 14 4375 15 3515 16 4335 17 4220 18 4175 19 4175 20 4350 Min 3170 Max 5490 Median 4342.500 Avg 4466.500 Revert commit 2920dd92b980 ("net: dsa: mt7530: disable LEDs before reset"). Changing the state of pins via reset assertion is simpler and more efficient than doing so by setting the LED controller off. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Co-developed-by: Justin Swartz Signed-off-by: Justin Swartz Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b988c8a40d536..80b346d4d990f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2187,11 +2187,11 @@ mt7530_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } @@ -2401,11 +2401,11 @@ mt7531_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } -- GitLab From 1d830032bcbace99beb25ed4323ee84079a9a806 Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Sat, 16 Mar 2024 13:25:20 -0300 Subject: [PATCH 1085/1418] drm: Fix drm_fixp2int_round() making it add 0.5 [ Upstream commit 807f96abdf14c80f534c78f2d854c2590963345c ] As well noted by Pekka[1], the rounding of drm_fixp2int_round is wrong. To round a number, you need to add 0.5 to the number and floor that, drm_fixp2int_round() is adding 0.0000076. Make it add 0.5. [1]: https://lore.kernel.org/all/20240301135327.22efe0dd.pekka.paalanen@collabora.com/ Fixes: 8b25320887d7 ("drm: Add fixed-point helper to get rounded integer values") Suggested-by: Pekka Paalanen Reviewed-by: Harry Wentland Reviewed-by: Melissa Wen Signed-off-by: Arthur Grillo Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/20240316-drm_fixed-v2-1-c1bc2665b5ed@riseup.net Signed-off-by: Sasha Levin --- include/drm/drm_fixed.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 6230088428cdb..a476a406e5997 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -70,7 +70,6 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) } #define DRM_FIXED_POINT 32 -#define DRM_FIXED_POINT_HALF 16 #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) @@ -89,7 +88,7 @@ static inline int drm_fixp2int(s64 a) static inline int drm_fixp2int_round(s64 a) { - return drm_fixp2int(a + (1 << (DRM_FIXED_POINT_HALF - 1))); + return drm_fixp2int(a + DRM_FIXED_ONE / 2); } static inline int drm_fixp2int_ceil(s64 a) -- GitLab From 80fc9b9c626b7be00b5fb9b0aa025901e1488ea7 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 9 Feb 2024 14:30:07 -0800 Subject: [PATCH 1086/1418] vdpa_sim: reset must not run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9588e7fc511f9c55b9835f14916e90ab940061b7 ] vdpasim_do_reset sets running to true, which is wrong, as it allows vdpasim_kick_vq to post work requests before the device has been configured. To fix, do not set running until VIRTIO_CONFIG_S_DRIVER_OK is set. Fixes: 0c89e2a3a9d0 ("vdpa_sim: Implement suspend vdpa op") Signed-off-by: Steve Sistare Reviewed-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <1707517807-137331-1-git-send-email-steven.sistare@oracle.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 61bde476cf9c8..e7fc25bfdd237 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -120,7 +120,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) for (i = 0; i < vdpasim->dev_attr.nas; i++) vhost_iotlb_reset(&vdpasim->iommu[i]); - vdpasim->running = true; + vdpasim->running = false; spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; @@ -513,6 +513,7 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) spin_lock(&vdpasim->lock); vdpasim->status = status; + vdpasim->running = (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0; spin_unlock(&vdpasim->lock); } -- GitLab From 07b6891ca62ebd532f4d4d4d0eed76ab607c2334 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 16 Feb 2024 09:25:02 -0500 Subject: [PATCH 1087/1418] vdpa/mlx5: Allow CVQ size changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 749a4016839270163efc36ecddddd01de491a16b ] The MLX driver was not updating its control virtqueue size at set_vq_num and instead always initialized to MLX5_CVQ_MAX_ENT (16) at setup_cvq_vring. Qemu would try to set the size to 64 by default, however, because the CVQ size always was initialized to 16, an error would be thrown when sending >16 control messages (as used-ring entry 17 is initialized to 0). For example, starting a guest with x-svq=on and then executing the following command would produce the error below: # for i in {1..20}; do ifconfig eth0 hw ether XX:xx:XX:xx:XX:XX; done qemu-system-x86_64: Insufficient written data (0) [ 435.331223] virtio_net virtio0: Failed to set mac address by vq command. SIOCSIFHWADDR: Invalid argument Acked-by: Dragos Tatulea Acked-by: Eugenio Pérez Signed-off-by: Jonah Palmer Message-Id: <20240216142502.78095-1-jonah.palmer@oracle.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2b7e796c48897..74d295312466f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -185,8 +185,6 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev); static bool mlx5_vdpa_debug; -#define MLX5_CVQ_MAX_ENT 16 - #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -1980,9 +1978,16 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + if (!is_index_valid(mvdev, idx)) return; + if (is_ctrl_vq_idx(mvdev, idx)) { + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->vring.vring.num = num; + return; + } + mvq = &ndev->vqs[idx]; mvq->num_ent = num; } @@ -2512,7 +2517,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) u16 idx = cvq->vring.last_avail_idx; err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, + cvq->vring.vring.num, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); -- GitLab From 45a83b220c83e3c326513269afbf69ae6fc65cce Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 14 Mar 2024 16:49:06 -0600 Subject: [PATCH 1088/1418] wireguard: receive: annotate data-race around receiving_counter.counter [ Upstream commit bba045dc4d996d03dce6fe45726e78a1a1f6d4c3 ] Syzkaller with KCSAN identified a data-race issue when accessing keypair->receiving_counter.counter. Use READ_ONCE() and WRITE_ONCE() annotations to mark the data race as intentional. BUG: KCSAN: data-race in wg_packet_decrypt_worker / wg_packet_rx_poll write to 0xffff888107765888 of 8 bytes by interrupt on cpu 0: counter_validate drivers/net/wireguard/receive.c:321 [inline] wg_packet_rx_poll+0x3ac/0xf00 drivers/net/wireguard/receive.c:461 __napi_poll+0x60/0x3b0 net/core/dev.c:6536 napi_poll net/core/dev.c:6605 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6738 __do_softirq+0xc4/0x279 kernel/softirq.c:553 do_softirq+0x5e/0x90 kernel/softirq.c:454 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline] _raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] ptr_ring_consume_bh include/linux/ptr_ring.h:367 [inline] wg_packet_decrypt_worker+0x6c5/0x700 drivers/net/wireguard/receive.c:499 process_one_work kernel/workqueue.c:2633 [inline] ... read to 0xffff888107765888 of 8 bytes by task 3196 on cpu 1: decrypt_packet drivers/net/wireguard/receive.c:252 [inline] wg_packet_decrypt_worker+0x220/0x700 drivers/net/wireguard/receive.c:501 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2706 worker_thread+0x525/0x730 kernel/workqueue.c:2787 ... Fixes: a9e90d9931f3 ("wireguard: noise: separate receive counter from send counter") Reported-by: syzbot+d1de830e4ecdaac83d89@syzkaller.appspotmail.com Signed-off-by: Nikita Zhandarovich Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/receive.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c88616..db01ec03bda00 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -251,7 +251,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || - keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { + READ_ONCE(keypair->receiving_counter.counter) >= REJECT_AFTER_MESSAGES)) { WRITE_ONCE(keypair->receiving.is_valid, false); return false; } @@ -318,7 +318,7 @@ static bool counter_validate(struct noise_replay_counter *counter, u64 their_cou for (i = 1; i <= top; ++i) counter->backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; - counter->counter = their_counter; + WRITE_ONCE(counter->counter, their_counter); } index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; @@ -463,7 +463,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, - keypair->receiving_counter.counter); + READ_ONCE(keypair->receiving_counter.counter)); goto next; } -- GitLab From 52287ed416a10bc3d3e204a3186d9509ab3ee634 Mon Sep 17 00:00:00 2001 From: Yewon Choi Date: Fri, 15 Mar 2024 18:28:38 +0900 Subject: [PATCH 1089/1418] rds: introduce acquire/release ordering in acquire/release_in_xmit() [ Upstream commit 1422f28826d2a0c11e5240b3e951c9e214d8656e ] acquire/release_in_xmit() work as bit lock in rds_send_xmit(), so they are expected to ensure acquire/release memory ordering semantics. However, test_and_set_bit/clear_bit() don't imply such semantics, on top of this, following smp_mb__after_atomic() does not guarantee release ordering (memory barrier actually should be placed before clear_bit()). Instead, we use clear_bit_unlock/test_and_set_bit_lock() here. Fixes: 0f4b1c7e89e6 ("rds: fix rds_send_xmit() serialization") Fixes: 1f9ecd7eacfd ("RDS: Pass rds_conn_path to rds_send_xmit()") Signed-off-by: Yewon Choi Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/ZfQUxnNTO9AJmzwc@libra05 Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/rds/send.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rds/send.c b/net/rds/send.c index a4ba45c430d81..0005fb43f2dfa 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -103,13 +103,12 @@ EXPORT_SYMBOL_GPL(rds_send_path_reset); static int acquire_in_xmit(struct rds_conn_path *cp) { - return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0; + return test_and_set_bit_lock(RDS_IN_XMIT, &cp->cp_flags) == 0; } static void release_in_xmit(struct rds_conn_path *cp) { - clear_bit(RDS_IN_XMIT, &cp->cp_flags); - smp_mb__after_atomic(); + clear_bit_unlock(RDS_IN_XMIT, &cp->cp_flags); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk -- GitLab From 87ca3d940f648bfe49d254fd6bc0c42b552e4af4 Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Fri, 15 Mar 2024 13:04:52 +0100 Subject: [PATCH 1090/1418] hsr: Handle failures in module init [ Upstream commit 3cf28cd492308e5f63ed00b29ea03ca016264376 ] A failure during registration of the netdev notifier was not handled at all. A failure during netlink initialization did not unregister the netdev notifier. Handle failures of netdev notifier registration and netlink initialization. Both functions should only return negative values on failure and thereby lead to the hsr module not being loaded. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Felix Maurer Reviewed-by: Shigeru Yoshida Reviewed-by: Breno Leitao Link: https://lore.kernel.org/r/3ce097c15e3f7ace98fc7fd9bcbf299f092e63d1.1710504184.git.fmaurer@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b099c31501509..257b50124cee5 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -148,14 +148,21 @@ static struct notifier_block hsr_nb = { static int __init hsr_init(void) { - int res; + int err; BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); - register_netdevice_notifier(&hsr_nb); - res = hsr_netlink_init(); + err = register_netdevice_notifier(&hsr_nb); + if (err) + return err; + + err = hsr_netlink_init(); + if (err) { + unregister_netdevice_notifier(&hsr_nb); + return err; + } - return res; + return 0; } static void __exit hsr_exit(void) -- GitLab From 6af7c8a2980b859b62acdb47a4d1098e467a9fca Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Fri, 15 Mar 2024 15:35:40 +0100 Subject: [PATCH 1091/1418] ipv4: raw: Fix sending packets from raw sockets via IPsec tunnels [ Upstream commit c9b3b81716c5b92132a6c1d4ac3c48a7b44082ab ] Since the referenced commit, the xfrm_inner_extract_output() function uses the protocol field to determine the address family. So not setting it for IPv4 raw sockets meant that such packets couldn't be tunneled via IPsec anymore. IPv6 raw sockets are not affected as they already set the protocol since 9c9c9ad5fae7 ("ipv6: set skb->protocol on tcp, raw and ip6_append_data genereated skbs"). Fixes: f4796398f21b ("xfrm: Remove inner/outer modes from output path") Signed-off-by: Tobias Brunner Reviewed-by: David Ahern Reviewed-by: Nicolas Dichtel Link: https://lore.kernel.org/r/c5d9a947-eb19-4164-ac99-468ea814ce20@strongswan.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/raw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7c63b91edbf7a..ee0efd0efec40 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -348,6 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, goto error; skb_reserve(skb, hlen); + skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; -- GitLab From b29a5055eeb0878b0a1fd8afce55795c282e06da Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Fri, 15 Mar 2024 20:50:52 +0300 Subject: [PATCH 1092/1418] net: phy: fix phy_read_poll_timeout argument type in genphy_loopback [ Upstream commit 32fa4366cc4da1c97b725a0066adf43c6b298f37 ] read_poll_timeout inside phy_read_poll_timeout can set val negative in some cases (for example, __mdiobus_read inside phy_read can return -EOPNOTSUPP). Supposedly, commit 4ec732951702 ("net: phylib: fix phy_read*_poll_timeout()") should fix problems with wrong-signed vals, but I do not see how as val is sent to phy_read as is and __val = phy_read (not val) is checked for sign. Change val type for signed to allow better error handling as done in other phy_read_poll_timeout callers. This will not fix any error handling by itself, but allows, for example, to modify cond with appropriate sign check or check resulting val separately. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 014068dcb5b1 ("net: phy: genphy_loopback: add link speed configuration") Signed-off-by: Nikita Kiryushin Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20240315175052.8049-1-kiryushin@ancud.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 45b07004669d6..f25b0d338ca8d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2640,8 +2640,8 @@ EXPORT_SYMBOL(genphy_resume); int genphy_loopback(struct phy_device *phydev, bool enable) { if (enable) { - u16 val, ctl = BMCR_LOOPBACK; - int ret; + u16 ctl = BMCR_LOOPBACK; + int ret, val; ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); -- GitLab From 20e21c3c0195d915f33bc7321ee6b362177bf5bf Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 18 Mar 2024 18:35:06 +0100 Subject: [PATCH 1093/1418] dm-integrity: fix a memory leak when rechecking the data [ Upstream commit 55e565c42dce81a4e49c13262d5bc4eb4c2e588a ] Memory for the "checksums" pointer will leak if the data is rechecked after checksum failure (because the associated kfree won't happen due to 'goto skip_io'). Fix this by freeing the checksums memory before recheck, and just use the "checksum_onstack" memory for storing checksum during recheck. Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3da4359f51645..e1bf91faa462b 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1856,12 +1856,12 @@ again: r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { + if (likely(checksums != checksums_onstack)) + kfree(checksums); if (r > 0) { - integrity_recheck(dio, checksums); + integrity_recheck(dio, checksums_onstack); goto skip_io; } - if (likely(checksums != checksums_onstack)) - kfree(checksums); goto error; } -- GitLab From cf7d8cba639ae792a42c2a137b495eac262ac36c Mon Sep 17 00:00:00 2001 From: Thinh Tran Date: Fri, 15 Mar 2024 15:55:35 -0500 Subject: [PATCH 1094/1418] net/bnx2x: Prevent access to a freed page in page_pool [ Upstream commit d27e2da94a42655861ca4baea30c8cd65546f25d ] Fix race condition leading to system crash during EEH error handling During EEH error recovery, the bnx2x driver's transmit timeout logic could cause a race condition when handling reset tasks. The bnx2x_tx_timeout() schedules reset tasks via bnx2x_sp_rtnl_task(), which ultimately leads to bnx2x_nic_unload(). In bnx2x_nic_unload() SGEs are freed using bnx2x_free_rx_sge_range(). However, this could overlap with the EEH driver's attempt to reset the device using bnx2x_io_slot_reset(), which also tries to free SGEs. This race condition can result in system crashes due to accessing freed memory locations in bnx2x_free_rx_sge() 799 static inline void bnx2x_free_rx_sge(struct bnx2x *bp, 800 struct bnx2x_fastpath *fp, u16 index) 801 { 802 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index]; 803 struct page *page = sw_buf->page; .... where sw_buf was set to NULL after the call to dma_unmap_page() by the preceding thread. EEH: Beginning: 'slot_reset' PCI 0011:01:00.0#10000: EEH: Invoking bnx2x->slot_reset() bnx2x: [bnx2x_io_slot_reset:14228(eth1)]IO slot reset initializing... bnx2x 0011:01:00.0: enabling device (0140 -> 0142) bnx2x: [bnx2x_io_slot_reset:14244(eth1)]IO slot reset --> driver unload Kernel attempted to read user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc0080000025065fc Oops: Kernel access of bad area, sig: 11 [#1] ..... Call Trace: [c000000003c67a20] [c00800000250658c] bnx2x_io_slot_reset+0x204/0x610 [bnx2x] (unreliable) [c000000003c67af0] [c0000000000518a8] eeh_report_reset+0xb8/0xf0 [c000000003c67b60] [c000000000052130] eeh_pe_report+0x180/0x550 [c000000003c67c70] [c00000000005318c] eeh_handle_normal_event+0x84c/0xa60 [c000000003c67d50] [c000000000053a84] eeh_event_handler+0xf4/0x170 [c000000003c67da0] [c000000000194c58] kthread+0x1c8/0x1d0 [c000000003c67e10] [c00000000000cf64] ret_from_kernel_thread+0x5c/0x64 To solve this issue, we need to verify page pool allocations before freeing. Fixes: 4cace675d687 ("bnx2x: Alloc 4k fragment for each rx ring buffer element") Signed-off-by: Thinh Tran Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240315205535.1321-1-thinhtr@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index d8b1824c334d3..0bc1367fd6492 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -1002,9 +1002,6 @@ static inline void bnx2x_set_fw_mac_addr(__le16 *fw_hi, __le16 *fw_mid, static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp, struct bnx2x_alloc_pool *pool) { - if (!pool->page) - return; - put_page(pool->page); pool->page = NULL; @@ -1015,6 +1012,9 @@ static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp, { int i; + if (!fp->page_pool.page) + return; + if (fp->mode == TPA_MODE_DISABLED) return; -- GitLab From ac3f337f0a2ee559514d6c831890c2f642f80c27 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:48 +0530 Subject: [PATCH 1095/1418] octeontx2-af: recover CPT engine when it gets fault [ Upstream commit 07ea567d84cdf0add274d66db7c02b55b818d517 ] When CPT engine has uncorrectable errors, it will get halted and must be disabled and re-enabled. This patch adds code for the same. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 110 +++++++++++++----- 1 file changed, 80 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 38bbae5d9ae05..1ed16ce515bb1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -37,34 +37,60 @@ (_rsp)->free_sts_##etype = free_sts; \ }) -static irqreturn_t rvu_cpt_af_flt_intr_handler(int irq, void *ptr) +static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) { struct rvu_block *block = ptr; struct rvu *rvu = block->rvu; int blkaddr = block->addr; - u64 reg0, reg1, reg2; - - reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); - reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); - if (!is_rvu_otx2(rvu)) { - reg2 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(2)); - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx, 0x%llx", - reg0, reg1, reg2); - } else { - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx", - reg0, reg1); + u64 reg, val; + int i, eng; + u8 grp; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(vec)); + dev_err_ratelimited(rvu->dev, "Received CPTAF FLT%d irq : 0x%llx", vec, reg); + + i = -1; + while ((i = find_next_bit((unsigned long *)®, 64, i + 1)) < 64) { + switch (vec) { + case 0: + eng = i; + break; + case 1: + eng = i + 64; + break; + case 2: + eng = i + 128; + break; + } + grp = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng)) & 0xFF; + /* Disable and enable the engine which triggers fault */ + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), 0x0); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng)); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val & ~1ULL); + + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), grp); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val | 1ULL); } - - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(0), reg0); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(1), reg1); - if (!is_rvu_otx2(rvu)) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(2), reg2); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(vec), reg); return IRQ_HANDLED; } +static irqreturn_t rvu_cpt_af_flt0_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT0, ptr); +} + +static irqreturn_t rvu_cpt_af_flt1_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT1, ptr); +} + +static irqreturn_t rvu_cpt_af_flt2_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_10K_AF_INT_VEC_FLT2, ptr); +} + static irqreturn_t rvu_cpt_af_rvu_intr_handler(int irq, void *ptr) { struct rvu_block *block = ptr; @@ -119,8 +145,10 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) int i; /* Disable all CPT AF interrupts */ - for (i = 0; i < CPT_10K_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF); + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -151,7 +179,7 @@ static void cpt_unregister_interrupts(struct rvu *rvu, int blkaddr) /* Disable all CPT AF interrupts */ for (i = 0; i < CPT_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), ~0ULL); rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -172,16 +200,31 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; + irq_handler_t flt_fn; int i, ret; for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); + + switch (i) { + case CPT_10K_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT2: + flt_fn = rvu_cpt_af_flt2_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, off + i, - rvu_cpt_af_flt_intr_handler, - &rvu->irq_name[(off + i) * NAME_SIZE]); + flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + if (i == CPT_10K_AF_INT_VEC_FLT2) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF); + else + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, @@ -208,8 +251,8 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; struct rvu_block *block; + irq_handler_t flt_fn; int i, offs, ret = 0; - char irq_name[16]; if (!is_block_implemented(rvu->hw, blkaddr)) return 0; @@ -226,13 +269,20 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) return cpt_10k_register_interrupts(block, offs); for (i = CPT_AF_INT_VEC_FLT0; i < CPT_AF_INT_VEC_RVU; i++) { - snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + sprintf(&rvu->irq_name[(offs + i) * NAME_SIZE], "CPTAF FLT%d", i); + switch (i) { + case CPT_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, offs + i, - rvu_cpt_af_flt_intr_handler, - irq_name); + flt_fn, &rvu->irq_name[(offs + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, offs + CPT_AF_INT_VEC_RVU, -- GitLab From 35d8af38f1993678321ecf5c6f99e128ea3dd944 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:49 +0530 Subject: [PATCH 1096/1418] octeontx2-af: add mbox for CPT LF reset [ Upstream commit f58cf765e8f5f4860ea094aa12c156d9195a4c28 ] On OcteonTX2 SoC, the admin function (AF) is the only one with all priviliges to configure HW and alloc resources, PFs and it's VFs have to request AF via mailbox for all their needs. This patch adds a new mailbox for CPT VFs to request for CPT LF reset. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 8 +++++ .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 33 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 03ebabd616353..5decd1919de03 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -196,6 +196,7 @@ M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ +M(CPT_LF_RESET, 0xA08, cpt_lf_reset, cpt_lf_rst_req, msg_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -1702,6 +1703,13 @@ struct cpt_inst_lmtst_req { u64 rsvd; }; +/* Mailbox message format to request for CPT LF reset */ +struct cpt_lf_rst_req { + struct mbox_msghdr hdr; + u32 slot; + u32 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 1ed16ce515bb1..1cd34914cb86b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -851,6 +851,39 @@ int rvu_mbox_handler_cpt_ctx_cache_sync(struct rvu *rvu, struct msg_req *req, return rvu_cpt_ctx_flush(rvu, req->hdr.pcifunc); } +int rvu_mbox_handler_cpt_lf_reset(struct rvu *rvu, struct cpt_lf_rst_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int cptlf, blkaddr, ret; + u16 actual_slot; + u64 ctl, ctl2; + + blkaddr = rvu_get_blkaddr_from_slot(rvu, BLKTYPE_CPT, pcifunc, + req->slot, &actual_slot); + if (blkaddr < 0) + return CPT_AF_ERR_LF_INVALID; + + block = &rvu->hw->block[blkaddr]; + + cptlf = rvu_get_lf(rvu, block, pcifunc, actual_slot); + if (cptlf < 0) + return CPT_AF_ERR_LF_INVALID; + ctl = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf)); + ctl2 = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf)); + + ret = rvu_lf_reset(rvu, block, cptlf); + if (ret) + dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n", + block->addr, cptlf); + + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), ctl); + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), ctl2); + + return 0; +} + static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req; -- GitLab From 8b1140c5808b0ad694984357676c0f569590bcec Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:51 +0530 Subject: [PATCH 1097/1418] octeontx2-af: optimize cpt pf identification [ Upstream commit 9adb04ff62f51265002c2c83e718bcf459e06e48 ] Optimize CPT PF identification in mbox handling for faster mbox response by doing it at AF driver probe instead of every mbox message. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 8 ++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 13 ++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index d88d86bf07b03..8f5b7d14e3f7c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1164,8 +1164,16 @@ cpt: goto nix_err; } + err = rvu_cpt_init(rvu); + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize cpt\n", __func__); + goto mcs_err; + } + return 0; +mcs_err: + rvu_mcs_exit(rvu); nix_err: rvu_nix_freemem(rvu); npa_err: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 0b76dfa979d4e..e1760f9298b17 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -506,6 +506,7 @@ struct rvu { struct ptp *ptp; int mcs_blk_cnt; + int cpt_pf_num; #ifdef CONFIG_DEBUG_FS struct rvu_debugfs rvu_dbg; @@ -872,6 +873,7 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu); int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); +int rvu_cpt_init(struct rvu *rvu); #define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) #define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 1cd34914cb86b..923af460db296 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -340,7 +340,7 @@ static int get_cpt_pf_num(struct rvu *rvu) static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -352,7 +352,7 @@ static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -1023,7 +1023,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int s static int cpt_inline_inb_lf_cmd_send(struct rvu *rvu, int blkaddr, int nix_blkaddr) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; struct cpt_inst_lmtst_req *req; dma_addr_t res_daddr; int timeout = 3000; @@ -1167,3 +1167,10 @@ unlock: return 0; } + +int rvu_cpt_init(struct rvu *rvu) +{ + /* Retrieve CPT PF number */ + rvu->cpt_pf_num = get_cpt_pf_num(rvu); + return 0; +} -- GitLab From 8a231bd4d6cb774e3b903ec81fab9b6ec08039b1 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:54 +0530 Subject: [PATCH 1098/1418] octeontx2-af: add mbox to return CPT_AF_FLT_INT info [ Upstream commit 8299ffe3dc3dc9ac2bd60e3a8332008f03156aca ] CPT HW would trigger the CPT AF FLT interrupt when CPT engines hits some uncorrectable errors and AF is the one which receives the interrupt and recovers the engines. This patch adds a mailbox for CPT VFs to request for CPT faulted and recovered engines info. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 17 +++++++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 4 +++ .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 35 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 5decd1919de03..bbb6658420f1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -197,6 +197,8 @@ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ M(CPT_LF_RESET, 0xA08, cpt_lf_reset, cpt_lf_rst_req, msg_rsp) \ +M(CPT_FLT_ENG_INFO, 0xA09, cpt_flt_eng_info, cpt_flt_eng_info_req, \ + cpt_flt_eng_info_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -1710,6 +1712,21 @@ struct cpt_lf_rst_req { u32 rsvd; }; +/* Mailbox message format to request for CPT faulted engines */ +struct cpt_flt_eng_info_req { + struct mbox_msghdr hdr; + int blkaddr; + bool reset; + u32 rsvd; +}; + +struct cpt_flt_eng_info_rsp { + struct mbox_msghdr hdr; + u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index e1760f9298b17..6a39006c334d7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -109,6 +109,8 @@ struct rvu_block { u64 lfreset_reg; unsigned char name[NAME_SIZE]; struct rvu *rvu; + u64 cpt_flt_eng_map[3]; + u64 cpt_rcvrd_eng_map[3]; }; struct nix_mcast { @@ -521,6 +523,8 @@ struct rvu { struct list_head mcs_intrq_head; /* mcs interrupt queue lock */ spinlock_t mcs_intrq_lock; + /* CPT interrupt lock */ + spinlock_t cpt_intr_lock; }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 923af460db296..6fb02b93c1718 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -70,6 +70,14 @@ static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), grp); rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val | 1ULL); + + spin_lock(&rvu->cpt_intr_lock); + block->cpt_flt_eng_map[vec] |= BIT_ULL(i); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(eng)); + val = val & 0x3; + if (val == 0x1 || val == 0x2) + block->cpt_rcvrd_eng_map[vec] |= BIT_ULL(i); + spin_unlock(&rvu->cpt_intr_lock); } rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(vec), reg); @@ -884,6 +892,31 @@ int rvu_mbox_handler_cpt_lf_reset(struct rvu *rvu, struct cpt_lf_rst_req *req, return 0; } +int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_req *req, + struct cpt_flt_eng_info_rsp *rsp) +{ + struct rvu_block *block; + unsigned long flags; + int blkaddr, vec; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; + + block = &rvu->hw->block[blkaddr]; + for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) { + spin_lock_irqsave(&rvu->cpt_intr_lock, flags); + rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec]; + rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec]; + if (req->reset) { + block->cpt_flt_eng_map[vec] = 0x0; + block->cpt_rcvrd_eng_map[vec] = 0x0; + } + spin_unlock_irqrestore(&rvu->cpt_intr_lock, flags); + } + return 0; +} + static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req; @@ -1172,5 +1205,7 @@ int rvu_cpt_init(struct rvu *rvu) { /* Retrieve CPT PF number */ rvu->cpt_pf_num = get_cpt_pf_num(rvu); + spin_lock_init(&rvu->cpt_intr_lock); + return 0; } -- GitLab From a64cc7599ecec984997b5b3b3dfd2e3c75493c54 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:54 +0530 Subject: [PATCH 1099/1418] octeontx2: Detect the mbox up or down message via register [ Upstream commit a88e0f936ba9a301c78f6eacfd38737d003c130b ] A single line of interrupt is used to receive up notifications and down reply messages from AF to PF (similarly from PF to its VF). PF acts as bridge and forwards VF messages to AF and sends respsones back from AF to VF. When an async event like link event is received by up message when PF is in middle of forwarding VF message then mailbox errors occur because PF state machine is corrupted. Since VF is a separate driver or VF driver can be in a VM it is not possible to serialize from the start of communication at VF. Hence to differentiate between type of messages at PF this patch makes sender to set mbox data register with distinct values for up and down messages. Sender also checks whether previous interrupt is received before triggering current interrupt by waiting for mailbox data register to become zero. Fixes: 5a6d7c9daef3 ("octeontx2-pf: Mailbox communication with AF") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.c | 43 ++++++- .../net/ethernet/marvell/octeontx2/af/mbox.h | 6 + .../marvell/octeontx2/af/mcs_rvu_if.c | 17 ++- .../net/ethernet/marvell/octeontx2/af/rvu.c | 14 ++- .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 20 ++-- .../marvell/octeontx2/nic/otx2_common.h | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 113 ++++++++++++------ .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 71 ++++++----- 9 files changed, 205 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 9690ac01f02c8..7d741e3ba8c51 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -214,11 +214,12 @@ int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid) } EXPORT_SYMBOL(otx2_mbox_busy_poll_for_rsp); -void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) { struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; void *hw_mbase = mdev->hwbase; + u64 intr_val; tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; @@ -254,14 +255,52 @@ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) spin_unlock(&mdev->mbox_lock); + /* Check if interrupt pending */ + intr_val = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + intr_val |= data; /* The interrupt should be fired after num_msgs is written * to the shared memory */ - writeq(1, (void __iomem *)mbox->reg_base + + writeq(intr_val, (void __iomem *)mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift))); } + +void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_DOWN_MSG); +} EXPORT_SYMBOL(otx2_mbox_msg_send); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_UP_MSG); +} +EXPORT_SYMBOL(otx2_mbox_msg_send_up); + +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid) +{ + u64 data; + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + /* If data is non-zero wait for ~1ms and return to caller + * whether data has changed to zero or not after the wait. + */ + if (!data) + return true; + + usleep_range(950, 1000); + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + return data == 0; +} +EXPORT_SYMBOL(otx2_mbox_wait_for_zero); + struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, int size, int size_rsp) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index bbb6658420f1d..be70269e91684 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -16,6 +16,9 @@ #define MBOX_SIZE SZ_64K +#define MBOX_DOWN_MSG 1 +#define MBOX_UP_MSG 2 + /* AF/PF: PF initiated, PF/VF VF initiated */ #define MBOX_DOWN_RX_START 0 #define MBOX_DOWN_RX_SIZE (46 * SZ_1K) @@ -101,6 +104,7 @@ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase, struct pci_dev *pdev, void __force *reg_base, int direction, int ndevs, unsigned long *bmap); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid); int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid); int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid); struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, @@ -118,6 +122,8 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox, return otx2_mbox_alloc_msg_rsp(mbox, devid, size, 0); } +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid); + /* Mailbox message types */ #define MBOX_MSG_MASK 0xFFFF #define MBOX_MSG_INVALID 0xFFFE diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index dfd23580e3b8e..d39d86e694ccf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -121,13 +121,17 @@ int mcs_add_intr_wq_entry(struct mcs *mcs, struct mcs_intr_event *event) static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) { struct mcs_intr_info *req; - int err, pf; + int pf; pf = rvu_get_pf(event->pcifunc); + mutex_lock(&rvu->mbox_lock); + req = otx2_mbox_alloc_msg_mcs_intr_notify(rvu, pf); - if (!req) + if (!req) { + mutex_unlock(&rvu->mbox_lock); return -ENOMEM; + } req->mcs_id = event->mcs_id; req->intr_mask = event->intr_mask; @@ -135,10 +139,11 @@ static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) req->hdr.pcifunc = event->pcifunc; req->lmac_id = event->lmac_id; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pf); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); - if (err) - dev_warn(rvu->dev, "MCS notification to pf %d failed\n", pf); + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pf); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + + mutex_unlock(&rvu->mbox_lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 8f5b7d14e3f7c..59e6442ddf4a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2114,7 +2114,7 @@ bad_message: } } -static void __rvu_mbox_handler(struct rvu_work *mwork, int type) +static void __rvu_mbox_handler(struct rvu_work *mwork, int type, bool poll) { struct rvu *rvu = mwork->rvu; int offset, err, id, devid; @@ -2181,6 +2181,9 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) } mw->mbox_wrk[devid].num_msgs = 0; + if (poll) + otx2_mbox_wait_for_zero(mbox, devid); + /* Send mbox responses to VF/PF */ otx2_mbox_msg_send(mbox, devid); } @@ -2188,15 +2191,18 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) static inline void rvu_afpf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); + struct rvu *rvu = mwork->rvu; - __rvu_mbox_handler(mwork, TYPE_AFPF); + mutex_lock(&rvu->mbox_lock); + __rvu_mbox_handler(mwork, TYPE_AFPF, true); + mutex_unlock(&rvu->mbox_lock); } static inline void rvu_afvf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); - __rvu_mbox_handler(mwork, TYPE_AFVF); + __rvu_mbox_handler(mwork, TYPE_AFVF, false); } static void __rvu_mbox_up_handler(struct rvu_work *mwork, int type) @@ -2371,6 +2377,8 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, } } + mutex_init(&rvu->mbox_lock); + mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); if (!mbox_regions) { err = -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 6a39006c334d7..a3ae21398ca74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -525,6 +525,8 @@ struct rvu { spinlock_t mcs_intrq_lock; /* CPT interrupt lock */ spinlock_t cpt_intr_lock; + + struct mutex mbox_lock; /* Serialize mbox up and down msgs */ }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index bcb4385d0621c..d1e6b12ecfa70 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -232,7 +232,7 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) struct cgx_link_user_info *linfo; struct cgx_link_info_msg *msg; unsigned long pfmap; - int err, pfid; + int pfid; linfo = &event->link_uinfo; pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); @@ -250,16 +250,22 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) continue; } + mutex_lock(&rvu->mbox_lock); + /* Send mbox message to PF */ msg = otx2_mbox_alloc_msg_cgx_link_event(rvu, pfid); - if (!msg) + if (!msg) { + mutex_unlock(&rvu->mbox_lock); continue; + } + msg->link_info = *linfo; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pfid); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid); - if (err) - dev_warn(rvu->dev, "notification to pf %d failed\n", - pfid); + + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pfid); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pfid); + + mutex_unlock(&rvu->mbox_lock); } while (pfmap); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 44950c2542bb7..c15d1864a6371 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -785,7 +785,7 @@ static inline int otx2_sync_mbox_up_msg(struct mbox *mbox, int devid) if (!otx2_mbox_nonempty(&mbox->mbox_up, devid)) return 0; - otx2_mbox_msg_send(&mbox->mbox_up, devid); + otx2_mbox_msg_send_up(&mbox->mbox_up, devid); err = otx2_mbox_wait_for_rsp(&mbox->mbox_up, devid); if (err) return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a2d8ac6204054..05ee55022b92c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -292,8 +292,8 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) return 0; } -static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, - int first, int mdevs, u64 intr, int type) +static void otx2_queue_vf_work(struct mbox *mw, struct workqueue_struct *mbox_wq, + int first, int mdevs, u64 intr) { struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; @@ -307,40 +307,26 @@ static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, mbox = &mw->mbox; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; /* The hdr->num_msgs is set to zero immediately in the interrupt - * handler to ensure that it holds a correct value next time - * when the interrupt handler is called. - * pf->mbox.num_msgs holds the data for use in pfaf_mbox_handler - * pf>mbox.up_num_msgs holds the data for use in - * pfaf_mbox_up_handler. + * handler to ensure that it holds a correct value next time + * when the interrupt handler is called. pf->mw[i].num_msgs + * holds the data for use in otx2_pfvf_mbox_handler and + * pf->mw[i].up_num_msgs holds the data for use in + * otx2_pfvf_mbox_up_handler. */ if (hdr->num_msgs) { mw[i].num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_wrk); } mbox = &mw->mbox_up; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; if (hdr->num_msgs) { mw[i].up_num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_up_wrk); } } @@ -356,8 +342,10 @@ static void otx2_forward_msg_pfvf(struct otx2_mbox_dev *mdev, /* Msgs are already copied, trigger VF's mbox irq */ smp_wmb(); + otx2_mbox_wait_for_zero(pfvf_mbox, devid); + offset = pfvf_mbox->trigger | (devid << pfvf_mbox->tr_shift); - writeq(1, (void __iomem *)pfvf_mbox->reg_base + offset); + writeq(MBOX_DOWN_MSG, (void __iomem *)pfvf_mbox->reg_base + offset); /* Restore VF's mbox bounce buffer region address */ src_mdev->mbase = bbuf_base; @@ -547,7 +535,7 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) - __otx2_mbox_reset(mbox, 0); + __otx2_mbox_reset(mbox, vf_idx); mdev->msgs_acked++; } } @@ -564,8 +552,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) if (vfs > 64) { intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(1)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, - TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); vfs = 64; @@ -574,7 +561,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); @@ -822,20 +809,22 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) struct mbox *af_mbox; struct otx2_nic *pf; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); pf = af_mbox->pfvf; - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; - if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + if (mdev->msgs_acked == (num_msgs - 1)) __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } @@ -946,12 +935,14 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) int offset, id, devid = 0; struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; + u16 num_msgs; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); devid = msg->pcifunc & RVU_PFVF_FUNC_MASK; @@ -960,10 +951,11 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) otx2_process_mbox_msg_up(pf, msg); offset = mbox->rx_start + msg->next_msgoff; } - if (devid) { + /* Forward to VF iff VFs are really present */ + if (devid && pci_num_vf(pf->pdev)) { otx2_forward_vf_mbox_msgs(pf, &pf->mbox.mbox_up, MBOX_DIR_PFVF_UP, devid - 1, - af_mbox->up_num_msgs); + num_msgs); return; } @@ -973,16 +965,49 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) { struct otx2_nic *pf = (struct otx2_nic *)pf_irq; - struct mbox *mbox; + struct mbox *mw = &pf->mbox; + struct otx2_mbox_dev *mdev; + struct otx2_mbox *mbox; + struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(pf, RVU_PF_INT, BIT_ULL(0)); - mbox = &pf->mbox; - trace_otx2_msg_interrupt(mbox->mbox.pdev, "AF to PF", BIT_ULL(0)); + mbox_data = otx2_read64(pf, RVU_PF_PFAF_MBOX0); + + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_up_wrk); + + trace_otx2_msg_interrupt(pf->pdev, "UP message from AF to PF", + BIT_ULL(0)); + } + + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_wrk); - otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF); + trace_otx2_msg_interrupt(pf->pdev, "DOWN reply from AF to PF", + BIT_ULL(0)); + } return IRQ_HANDLED; } @@ -3030,6 +3055,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) struct otx2_vf_config *config; struct cgx_link_info_msg *req; struct mbox_msghdr *msghdr; + struct delayed_work *dwork; struct otx2_nic *pf; int vf_idx; @@ -3038,10 +3064,21 @@ static void otx2_vf_link_event_task(struct work_struct *work) vf_idx = config - config->pf->vf_configs; pf = config->pf; + mutex_lock(&pf->mbox.lock); + + dwork = &config->link_event_work; + + if (!otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx)) { + schedule_delayed_work(dwork, msecs_to_jiffies(100)); + mutex_unlock(&pf->mbox.lock); + return; + } + msghdr = otx2_mbox_alloc_msg_rsp(&pf->mbox_pfvf[0].mbox_up, vf_idx, sizeof(*req), sizeof(struct msg_rsp)); if (!msghdr) { dev_err(pf->dev, "Failed to create VF%d link event\n", vf_idx); + mutex_unlock(&pf->mbox.lock); return; } @@ -3050,7 +3087,11 @@ static void otx2_vf_link_event_task(struct work_struct *work) req->hdr.sig = OTX2_MBOX_REQ_SIG; memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); - otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); + otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); + + otx2_mbox_msg_send_up(&pf->mbox_pfvf[0].mbox_up, vf_idx); + + mutex_unlock(&pf->mbox.lock); } static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 404855bccb4b6..68fef947ccced 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -89,16 +89,20 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work) struct otx2_mbox *mbox; struct mbox *af_mbox; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (af_mbox->num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -151,6 +155,7 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) struct mbox *vf_mbox; struct otx2_nic *vf; int offset, id; + u16 num_msgs; vf_mbox = container_of(work, struct mbox, mbox_up_wrk); vf = vf_mbox->pfvf; @@ -158,12 +163,14 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (vf_mbox->up_num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < vf_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_mbox_msg_up(vf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -178,40 +185,48 @@ static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq) struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(vf, RVU_VF_INT, BIT_ULL(0)); + mbox_data = otx2_read64(vf, RVU_VF_VFPF_MBOX0); + /* Read latest mbox data */ smp_rmb(); - /* Check for PF => VF response messages */ - mbox = &vf->mbox.mbox; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); + + /* Check for PF => VF response messages */ + mbox = &vf->mbox.mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); - trace_otx2_msg_interrupt(mbox->pdev, "PF to VF", BIT_ULL(0)); + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); + trace_otx2_msg_interrupt(mbox->pdev, "DOWN reply from PF to VF", + BIT_ULL(0)); } - /* Check for PF => VF notification messages */ - mbox = &vf->mbox.mbox_up; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); - - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.up_num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); + + /* Check for PF => VF notification messages */ + mbox = &vf->mbox.mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + + trace_otx2_msg_interrupt(mbox->pdev, "UP message from PF to VF", + BIT_ULL(0)); } return IRQ_HANDLED; -- GitLab From e545e4b1c1c1c331a1f8e4e56e8cc1d2a82bd625 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 May 2023 13:52:28 -1000 Subject: [PATCH 1100/1418] net: octeontx2: Use alloc_ordered_workqueue() to create ordered workqueues [ Upstream commit 289f97467480266f9bd8cac7f1e05a478d523f79 ] BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not ncessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo Reviewed-by: Sunil Goutham Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Ratheesh Kannoth Cc: Srujana Challa Cc: Geetha sowjanya Cc: netdev@vger.kernel.org Stable-dep-of: 7558ce0d974c ("octeontx2-pf: Use default max_active works instead of one") Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 5 ++--- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 13 +++++-------- .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 5 ++--- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 59e6442ddf4a4..a7965b457bee9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -3055,9 +3055,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - 1); + rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rvu->flr_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 05ee55022b92c..3f044b161e8bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -272,8 +272,7 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) { int vf; - pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", - WQ_UNBOUND | WQ_HIGHPRI, 1); + pf->flr_wq = alloc_ordered_workqueue("otx2_pf_flr_wq", WQ_HIGHPRI); if (!pf->flr_wq) return -ENOMEM; @@ -584,9 +583,8 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -1088,9 +1086,8 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) int err; mbox->pfvf = pf; - pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_wq = alloc_ordered_workqueue("otx2_pfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 68fef947ccced..dcb8190de2407 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -308,9 +308,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) int err; mbox->pfvf = vf; - vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + vf->mbox_wq = alloc_ordered_workqueue("otx2_vfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!vf->mbox_wq) return -ENOMEM; -- GitLab From 53ae0f36690ca6a29f932c74721aa63c617fe692 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:56 +0530 Subject: [PATCH 1101/1418] octeontx2-pf: Use default max_active works instead of one [ Upstream commit 7558ce0d974ced1dc07edc1197f750fe28c52e57 ] Only one execution context for the workqueue used for PF and VFs mailbox communication is incorrect since multiple works are queued simultaneously by all the VFs and PF link UP messages. Hence use default number of execution contexts by passing zero as max_active to alloc_workqueue function. With this fix in place, modify UP messages also to wait until completion. Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 3f044b161e8bf..a6c5f6a2dab07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -583,8 +583,9 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", - WQ_HIGHPRI | WQ_MEM_RECLAIM); + pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 0); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -3086,7 +3087,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); - otx2_mbox_msg_send_up(&pf->mbox_pfvf[0].mbox_up, vf_idx); + otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); mutex_unlock(&pf->mbox.lock); } -- GitLab From 53e6709a2ff1a140549a3662a0c5ebf5c9c8d724 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:57 +0530 Subject: [PATCH 1102/1418] octeontx2-pf: Send UP messages to VF only when VF is up. [ Upstream commit dfcf6355f53b1796cf7fd50a4f27b18ee6a3497a ] When PF sending link status messages to VF, it is possible that by the time link_event_task work function is executed VF might have brought down. Hence before sending VF link status message check whether VF is up to receive it. Fixes: ad513ed938c9 ("octeontx2-vf: Link event notification support") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a6c5f6a2dab07..7e2c30927c312 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3062,6 +3062,9 @@ static void otx2_vf_link_event_task(struct work_struct *work) vf_idx = config - config->pf->vf_configs; pf = config->pf; + if (config->intf_down) + return; + mutex_lock(&pf->mbox.lock); dwork = &config->link_event_work; -- GitLab From 29d2550d79a8cbd31e0fbaa5c0e2a2efdc444e44 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:58 +0530 Subject: [PATCH 1103/1418] octeontx2-af: Use separate handlers for interrupts [ Upstream commit 50e60de381c342008c0956fd762e1c26408f372c ] For PF to AF interrupt vector and VF to AF vector same interrupt handler is registered which is causing race condition. When two interrupts are raised to two CPUs at same time then two cores serve same event corrupting the data. Fixes: 7304ac4567bc ("octeontx2-af: Add mailbox IRQ and msg handlers") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index a7965b457bee9..a7034b47ed6c9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2528,10 +2528,9 @@ static void rvu_queue_work(struct mbox_wq_info *mw, int first, } } -static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +static irqreturn_t rvu_mbox_pf_intr_handler(int irq, void *rvu_irq) { struct rvu *rvu = (struct rvu *)rvu_irq; - int vfs = rvu->vfs; u64 intr; intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT); @@ -2545,6 +2544,18 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) rvu_queue_work(&rvu->afpf_wq_info, 0, rvu->hw->total_pfs, intr); + return IRQ_HANDLED; +} + +static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +{ + struct rvu *rvu = (struct rvu *)rvu_irq; + int vfs = rvu->vfs; + u64 intr; + + /* Sync with mbox memory region */ + rmb(); + /* Handle VF interrupts */ if (vfs > 64) { intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(1)); @@ -2881,7 +2892,7 @@ static int rvu_register_interrupts(struct rvu *rvu) /* Register mailbox interrupt handler */ sprintf(&rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], "RVUAF Mbox"); ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_MBOX), - rvu_mbox_intr_handler, 0, + rvu_mbox_pf_intr_handler, 0, &rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], rvu); if (ret) { dev_err(rvu->dev, -- GitLab From 5ad233dc731ab64cdc47b84a5c1f78fff6c024af Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Mar 2024 10:02:41 +0100 Subject: [PATCH 1104/1418] netfilter: nft_set_pipapo: release elements in clone only from destroy path [ Upstream commit b0e256f3dd2ba6532f37c5c22e07cb07a36031ee ] Clone already always provides a current view of the lookup table, use it to destroy the set, otherwise it is possible to destroy elements twice. This fix requires: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") which came after: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path"). Fixes: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index e1969209b3abb..58eca26162735 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2240,8 +2240,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(ctx, set, m); - for_each_possible_cpu(cpu) pipapo_free_scratch(m, cpu); free_percpu(m->scratch); @@ -2253,8 +2251,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->clone) { m = priv->clone; - if (priv->dirty) - nft_set_pipapo_match_destroy(ctx, set, m); + nft_set_pipapo_match_destroy(ctx, set, m); for_each_possible_cpu(cpu) pipapo_free_scratch(priv->clone, cpu); -- GitLab From 9683cb6c2c6c0f45537bf0b8868b5d38fcb63fc7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2024 18:51:38 +0100 Subject: [PATCH 1105/1418] netfilter: nf_tables: do not compare internal table flags on updates [ Upstream commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139 ] Restore skipping transaction if table update does not modify flags. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d3ba947f43761..0a86c019a75de 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1205,7 +1205,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_MASK) return -EOPNOTSUPP; - if (flags == ctx->table->flags) + if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; if ((nft_table_has_owner(ctx->table) && -- GitLab From f21ddce5b8c4caab6375ff7d612ff15e19f6d270 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:34 -0700 Subject: [PATCH 1106/1418] rcu: add a helper to report consolidated flavor QS [ Upstream commit 1a77557d48cff187a169c2aec01c0dd78a5e7e50 ] When under heavy load, network processing can run CPU-bound for many tens of seconds. Even in preemptible kernels (non-RT kernel), this can block RCU Tasks grace periods, which can cause trace-event removal to take more than a minute, which is unacceptably long. This commit therefore creates a new helper function that passes through both RCU and RCU-Tasks quiescent states every 100 milliseconds. This hard-coded value suffices for current workloads. Suggested-by: Paul E. McKenney Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Reviewed-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/90431d46ee112d2b0af04dbfe936faaca11810a5.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Stable-dep-of: d6dbbb11247c ("net: report RCU QS on threaded NAPI repolling") Signed-off-by: Sasha Levin --- include/linux/rcupdate.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d2507168b9c7b..319698087d66a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -268,6 +268,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. -- GitLab From 3890e7008c553fb582f14e01b64e84bc37959093 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:37 -0700 Subject: [PATCH 1107/1418] net: report RCU QS on threaded NAPI repolling [ Upstream commit d6dbbb11247c71203785a2c9da474c36f4b19eae ] NAPI threads can keep polling packets under load. Currently it is only calling cond_resched() before repolling, but it is not sufficient to clear out the holdout of RCU tasks, which prevent BPF tracing programs from detaching for long period. This can be reproduced easily with following set up: ip netns add test1 ip netns add test2 ip -n test1 link add veth1 type veth peer name veth2 netns test2 ip -n test1 link set veth1 up ip -n test1 link set lo up ip -n test2 link set veth2 up ip -n test2 link set lo up ip -n test1 addr add 192.168.1.2/31 dev veth1 ip -n test1 addr add 1.1.1.1/32 dev lo ip -n test2 addr add 192.168.1.3/31 dev veth2 ip -n test2 addr add 2.2.2.2/31 dev lo ip -n test1 route add default via 192.168.1.3 ip -n test2 route add default via 192.168.1.2 for i in `seq 10 210`; do for j in `seq 10 210`; do ip netns exec test2 iptables -I INPUT -s 3.3.$i.$j -p udp --dport 5201 done done ip netns exec test2 ethtool -K veth2 gro on ip netns exec test2 bash -c 'echo 1 > /sys/class/net/veth2/threaded' ip netns exec test1 ethtool -K veth1 tso off Then run an iperf3 client/server and a bpftrace script can trigger it: ip netns exec test2 iperf3 -s -B 2.2.2.2 >/dev/null& ip netns exec test1 iperf3 -c 2.2.2.2 -B 1.1.1.1 -u -l 1500 -b 3g -t 100 >/dev/null& bpftrace -e 'kfunc:__napi_poll{@=count();} interval:s:1{exit();}' Report RCU quiescent states periodically will resolve the issue. Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Acked-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/4c3b0d3f32d3b18949d75b18e5e1d9f13a24f025.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 9a48a7e26cf46..65284eeec7de5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6645,6 +6645,8 @@ static int napi_threaded_poll(void *data) void *have; while (!napi_thread_wait(napi)) { + unsigned long last_qs = jiffies; + for (;;) { bool repoll = false; @@ -6659,6 +6661,7 @@ static int napi_threaded_poll(void *data) if (!repoll) break; + rcu_softirq_qs_periodic(last_qs); cond_resched(); } } -- GitLab From 5ff8f56c392bf5e728f5630820b6a42299a1fe23 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:40 -0700 Subject: [PATCH 1108/1418] bpf: report RCU QS in cpumap kthread [ Upstream commit 00bf63122459e87193ee7f1bc6161c83a525569f ] When there are heavy load, cpumap kernel threads can be busy polling packets from redirect queues and block out RCU tasks from reaching quiescent states. It is insufficient to just call cond_resched() in such context. Periodically raise a consolidated RCU QS before cond_resched fixes the problem. Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Acked-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/c17b9f1517e19d813da3ede5ed33ee18496bb5d8.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 0508937048137..806a7c1b364b6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -306,6 +306,7 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; + unsigned long last_qs = jiffies; complete(&rcpu->kthread_running); set_current_state(TASK_INTERRUPTIBLE); @@ -331,10 +332,12 @@ static int cpu_map_kthread_run(void *data) if (__ptr_ring_empty(rcpu->queue)) { schedule(); sched = 1; + last_qs = jiffies; } else { __set_current_state(TASK_RUNNING); } } else { + rcu_softirq_qs_periodic(last_qs); sched = cond_resched(); } -- GitLab From f1fa919ea59655f73cb3972264e157b8831ba546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:33:41 +0300 Subject: [PATCH 1109/1418] net: dsa: mt7530: fix link-local frames that ingress vlan filtering ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e8bf353577f382c7066c661fed41b2adc0fc7c40 ] Whether VLAN-aware or not, on every VID VLAN table entry that has the CPU port as a member of it, frames are set to egress the CPU port with the VLAN tag stacked. This is so that VLAN tags can be appended after hardware special tag (called DSA tag in the context of Linux drivers). For user ports on a VLAN-unaware bridge, frame ingressing the user port egresses CPU port with only the special tag. For user ports on a VLAN-aware bridge, frame ingressing the user port egresses CPU port with the special tag and the VLAN tag. This causes issues with link-local frames, specifically BPDUs, because the software expects to receive them VLAN-untagged. There are two options to make link-local frames egress untagged. Setting CONSISTENT or UNTAGGED on the EG_TAG bits on the relevant register. CONSISTENT means frames egress exactly as they ingress. That means egressing with the VLAN tag they had at ingress or egressing untagged if they ingressed untagged. Although link-local frames are not supposed to be transmitted VLAN-tagged, if they are done so, when egressing through a CPU port, the special tag field will be broken. BPDU egresses CPU port with VLAN tag egressing stacked, received on software: 00:01:25.104821 AF Unknown (382365846), length 106: | STAG | | VLAN | 0x0000: 0000 6c27 614d 4143 0001 0000 8100 0001 ..l'aMAC........ 0x0010: 0026 4242 0300 0000 0000 0000 6c27 614d .&BB........l'aM 0x0020: 4143 0000 0000 0000 6c27 614d 4143 0000 AC......l'aMAC.. 0x0030: 0000 1400 0200 0f00 0000 0000 0000 0000 ................ BPDU egresses CPU port with VLAN tag egressing untagged, received on software: 00:23:56.628708 AF Unknown (25215488), length 64: | STAG | 0x0000: 0000 6c27 614d 4143 0001 0000 0026 4242 ..l'aMAC.....&BB 0x0010: 0300 0000 0000 0000 6c27 614d 4143 0000 ........l'aMAC.. 0x0020: 0000 0000 6c27 614d 4143 0000 0000 1400 ....l'aMAC...... 0x0030: 0200 0f00 0000 0000 0000 0000 ............ BPDU egresses CPU port with VLAN tag egressing tagged, received on software: 00:01:34.311963 AF Unknown (25215488), length 64: | Mess | 0x0000: 0000 6c27 614d 4143 0001 0001 0026 4242 ..l'aMAC.....&BB 0x0010: 0300 0000 0000 0000 6c27 614d 4143 0000 ........l'aMAC.. 0x0020: 0000 0000 6c27 614d 4143 0000 0000 1400 ....l'aMAC...... 0x0030: 0200 0f00 0000 0000 0000 0000 ............ To prevent confusing the software, force the frame to egress UNTAGGED instead of CONSISTENT. This way, frames can't possibly be received TAGGED by software which would have the special tag field broken. VLAN Tag Egress Procedure For all frames, one of these options set the earliest in this order will apply to the frame: - EG_TAG in certain registers for certain frames. This will apply to frame with matching MAC DA or EtherType. - EG_TAG in the address table. This will apply to frame at its incoming port. - EG_TAG in the PVC register. This will apply to frame at its incoming port. - EG_CON and [EG_TAG per port] in the VLAN table. This will apply to frame at its outgoing port. - EG_TAG in the PCR register. This will apply to frame at its outgoing port. EG_TAG in certain registers for certain frames: PPPoE Discovery_ARP/RARP: PPP_EG_TAG and ARP_EG_TAG in the APC register. IGMP_MLD: IGMP_EG_TAG and MLD_EG_TAG in the IMC register. BPDU and PAE: BPDU_EG_TAG and PAE_EG_TAG in the BPC register. REV_01 and REV_02: R01_EG_TAG and R02_EG_TAG in the RGAC1 register. REV_03 and REV_0E: R03_EG_TAG and R0E_EG_TAG in the RGAC2 register. REV_10 and REV_20: R10_EG_TAG and R20_EG_TAG in the RGAC3 register. REV_21 and REV_UN: R21_EG_TAG and RUN_EG_TAG in the RGAC4 register. With this change, it can be observed that a bridge interface with stp_state and vlan_filtering enabled will properly block ports now. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 23 +++++++++++++++-------- drivers/net/dsa/mt7530.h | 9 ++++++++- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 80b346d4d990f..86c410f9fef8c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1001,16 +1001,23 @@ unlock_exit: static void mt753x_trap_frames(struct mt7530_priv *priv) { - /* Trap BPDUs to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them + * VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_BPDU_CPU_ONLY); - /* Trap 802.1X PAE frames to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK, - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY)); - - /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, + /* Trap LLDP frames with :0E MAC DA to the CPU port(s) and egress them + * VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); } diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 6202b0f8c3f34..a5b864fd7d60c 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -63,12 +63,18 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 -#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) #define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x) +#define MT753X_BPDU_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_BPDU_EG_TAG(x) FIELD_PREP(MT753X_BPDU_EG_TAG_MASK, x) +#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) @@ -251,6 +257,7 @@ enum mt7530_port_mode { enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, MT7530_VLAN_EG_CONSISTENT = 1, + MT7530_VLAN_EG_UNTAGGED = 4, }; enum mt7530_vlan_port_attr { -- GitLab From 86c0c154a759f2af9612a04bdf29110f02dce956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:33:42 +0300 Subject: [PATCH 1110/1418] net: dsa: mt7530: fix handling of all link-local frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 69ddba9d170bdaee1dc0eb4ced38d7e4bb7b92af ] Currently, the MT753X switches treat frames with :01-0D and :0F MAC DAs as regular multicast frames, therefore flooding them to user ports. On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA must only be propagated to C-VLAN and MAC Bridge components. That means VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, these frames are supposed to be processed by the CPU (software). So we make the switch only forward them to the CPU port. And if received from a CPU port, forward to a single port. The software is responsible of making the switch conform to the latter by setting a single port as destination port on the special tag. This switch intellectual property cannot conform to this part of the standard fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC DAs, it also includes :22-FF which the scope of propagation is not supposed to be restricted for these MAC DAs. Set frames with :01-03 MAC DAs to be trapped to the CPU port(s). Add a comment for the remaining MAC DAs. Note that the ingress port must have a PVID assigned to it for the switch to forward untagged frames. A PVID is set by default on VLAN-aware and VLAN-unaware ports. However, when the network interface that pertains to the ingress port is attached to a vlan_filtering enabled bridge, the user can remove the PVID assignment from it which would prevent the link-local frames from being trapped to the CPU port. I am yet to see a way to forward link-local frames while preventing other untagged frames from being forwarded too. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 37 +++++++++++++++++++++++++++++++++---- drivers/net/dsa/mt7530.h | 13 +++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 86c410f9fef8c..07065c1af55e4 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -998,6 +998,21 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } +/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std + * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA + * must only be propagated to C-VLAN and MAC Bridge components. That means + * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, + * these frames are supposed to be processed by the CPU (software). So we make + * the switch only forward them to the CPU port. And if received from a CPU + * port, forward to a single port. The software is responsible of making the + * switch conform to the latter by setting a single port as destination port on + * the special tag. + * + * This switch intellectual property cannot conform to this part of the standard + * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC + * DAs, it also includes :22-FF which the scope of propagation is not supposed + * to be restricted for these MAC DAs. + */ static void mt753x_trap_frames(struct mt7530_priv *priv) { @@ -1012,13 +1027,27 @@ mt753x_trap_frames(struct mt7530_priv *priv) MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_BPDU_CPU_ONLY); - /* Trap LLDP frames with :0E MAC DA to the CPU port(s) and egress them - * VLAN-untagged. + /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | + MT753X_R01_PORT_FW_MASK, + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); + + /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. */ mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | + MT753X_R03_PORT_FW_MASK, MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index a5b864fd7d60c..fa2afa67ceb07 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -71,12 +71,25 @@ enum mt753x_id { #define MT753X_BPDU_EG_TAG(x) FIELD_PREP(MT753X_BPDU_EG_TAG_MASK, x) #define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +/* Register for :01 and :02 MAC DA frame control */ +#define MT753X_RGAC1 0x28 +#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) +#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) +#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) +#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) + /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) +#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) enum mt753x_bpdu_port_fw { MT753X_BPDU_FOLLOW_MFC, -- GitLab From 766ec94cc57492eab97cbbf1595bd516ab0cb0e4 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Thu, 21 Mar 2024 15:08:57 +0800 Subject: [PATCH 1111/1418] spi: spi-mt65xx: Fix NULL pointer access in interrupt handler [ Upstream commit a20ad45008a7c82f1184dc6dee280096009ece55 ] The TX buffer in spi_transfer can be a NULL pointer, so the interrupt handler may end up writing to the invalid memory and cause crashes. Add a check to trans->tx_buf before using it. Fixes: 1ce24864bff4 ("spi: mediatek: Only do dma for 4-byte aligned buffers") Signed-off-by: Fei Shao Reviewed-by: AngeloGioacchino Del Regno Link: https://msgid.link/r/20240321070942.1587146-2-fshao@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-mt65xx.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6e95efb50acbc..f9ec8742917a6 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -787,17 +787,19 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); mtk_spi_setup_packet(master); - cnt = mdata->xfer_len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, - trans->tx_buf + mdata->num_xfered, cnt); + if (trans->tx_buf) { + cnt = mdata->xfer_len / 4; + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, + trans->tx_buf + mdata->num_xfered, cnt); - remainder = mdata->xfer_len % 4; - if (remainder > 0) { - reg_val = 0; - memcpy(®_val, - trans->tx_buf + (cnt * 4) + mdata->num_xfered, - remainder); - writel(reg_val, mdata->base + SPI_TX_DATA_REG); + remainder = mdata->xfer_len % 4; + if (remainder > 0) { + reg_val = 0; + memcpy(®_val, + trans->tx_buf + (cnt * 4) + mdata->num_xfered, + remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } } mtk_spi_enable_transfer(master); -- GitLab From 1bf455b2c67cf1af8830e8e855fe8b822a3a0554 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 20 Mar 2024 08:57:17 +0200 Subject: [PATCH 1112/1418] selftests: forwarding: Fix ping failure due to short timeout [ Upstream commit e4137851d4863a9bdc6aabc613bcb46c06d91e64 ] The tests send 100 pings in 0.1 second intervals and force a timeout of 11 seconds, which is borderline (especially on debug kernels), resulting in random failures in netdev CI [1]. Fix by increasing the timeout to 20 seconds. It should not prolong the test unless something is wrong, in which case the test will rightfully fail. [1] # selftests: net/forwarding: vxlan_bridge_1d_port_8472_ipv6.sh # INFO: Running tests with UDP port 8472 # TEST: ping: local->local [ OK ] # TEST: ping: local->remote 1 [FAIL] # Ping failed [...] Fixes: b07e9957f220 ("selftests: forwarding: Add VxLAN tests with a VLAN-unaware bridge for IPv6") Fixes: 728b35259e28 ("selftests: forwarding: Add VxLAN tests with a VLAN-aware bridge for IPv6") Reported-by: Paolo Abeni Closes: https://lore.kernel.org/netdev/24a7051fdcd1f156c3704bca39e4b3c41dfc7c4b.camel@redhat.com/ Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240320065717.4145325-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh | 4 ++-- .../testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce82..bd3f7d492af2b 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -354,7 +354,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -410,7 +410,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh index d880df89bc8bd..e83fde79f40d0 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -457,7 +457,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -522,7 +522,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 -- GitLab From d0980ed818d73788e4817a438aed8ddb56d98219 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 23:31:55 +0100 Subject: [PATCH 1113/1418] dm: address indent/space issues [ Upstream commit 255e2646496fcbf836a3dfe1b535692f09f11b45 ] Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Stable-dep-of: b4d78cfeb304 ("dm-integrity: align the outgoing bio in integrity_recheck") Signed-off-by: Sasha Levin --- drivers/md/dm-cache-policy.h | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-integrity.c | 5 ++--- drivers/md/dm-log.c | 8 ++++---- drivers/md/dm-raid.c | 8 ++++---- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-table.c | 4 ++-- drivers/md/dm-thin.c | 6 +++--- drivers/md/dm-writecache.c | 2 +- drivers/md/persistent-data/dm-btree.c | 6 +++--- drivers/md/persistent-data/dm-space-map-common.c | 2 +- drivers/md/persistent-data/dm-space-map-common.h | 2 +- 12 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 6ba3e9c91af53..8bc21d54884e9 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -75,7 +75,7 @@ struct dm_cache_policy { * background work. */ int (*get_background_work)(struct dm_cache_policy *p, bool idle, - struct policy_work **result); + struct policy_work **result); /* * You must pass in the same work pointer that you were given, not diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e8c534b5870ac..25e51dc6e5598 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2535,7 +2535,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string type = &key_type_encrypted; set_key = set_key_encrypted; } else if (IS_ENABLED(CONFIG_TRUSTED_KEYS) && - !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { + !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { type = &key_type_trusted; set_key = set_key_trusted; } else { diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e1bf91faa462b..94382e43ea506 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2367,7 +2367,6 @@ offload_to_thread: else skip_check: dec_in_flight(dio); - } else { INIT_WORK(&dio->work, integrity_metadata); queue_work(ic->metadata_wq, &dio->work); @@ -4151,7 +4150,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { if (val < 1 << SECTOR_SHIFT || val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || - (val & (val -1))) { + (val & (val - 1))) { r = -EINVAL; ti->error = "Invalid block_size argument"; goto bad; @@ -4477,7 +4476,7 @@ try_smaller_buffer: if (ic->internal_hash) { size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); - if (!ic->recalc_wq ) { + if (!ic->recalc_wq) { ti->error = "Cannot allocate workqueue"; r = -ENOMEM; goto bad; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 05141eea18d3c..b7dd5a0cd58ba 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -756,8 +756,8 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, log_clear_bit(lc, lc->recovering_bits, region); if (in_sync) { log_set_bit(lc, lc->sync_bits, region); - lc->sync_count++; - } else if (log_test_bit(lc->sync_bits, region)) { + lc->sync_count++; + } else if (log_test_bit(lc->sync_bits, region)) { lc->sync_count--; log_clear_bit(lc, lc->sync_bits, region); } @@ -765,9 +765,9 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, static region_t core_get_sync_count(struct dm_dirty_log *log) { - struct log_c *lc = (struct log_c *) log->context; + struct log_c *lc = (struct log_c *) log->context; - return lc->sync_count; + return lc->sync_count; } #define DMEMIT_SYNC \ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7fbce214e00f5..bf833ca880bc1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -362,8 +362,8 @@ static struct { const int mode; const char *param; } _raid456_journal_mode[] = { - { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, - { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } + { R5C_JOURNAL_MODE_WRITE_THROUGH, "writethrough" }, + { R5C_JOURNAL_MODE_WRITE_BACK, "writeback" } }; /* Return MD raid4/5/6 journal mode for dm @journal_mode one */ @@ -1114,7 +1114,7 @@ too_many: * [stripe_cache ] Stripe cache size for higher RAIDs * [region_size ] Defines granularity of bitmap * [journal_dev ] raid4/5/6 journaling deviice - * (i.e. write hole closing log) + * (i.e. write hole closing log) * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) @@ -3999,7 +3999,7 @@ static int raid_preresume(struct dm_target *ti) } /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || (rs->requested_bitmap_chunk_sectors && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index c38e63706d911..2327645fc0648 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -902,7 +902,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; kfree(ms); - return NULL; + return NULL; } ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e0367a672eabf..aabb2435070b8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -72,7 +72,7 @@ static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) n = get_child(n, CHILDREN_PER_NODE - 1); if (n >= t->counts[l]) - return (sector_t) - 1; + return (sector_t) -1; return get_node(t, l, n)[KEYS_PER_NODE - 1]; } @@ -1533,7 +1533,7 @@ static bool dm_table_any_dev_attr(struct dm_table *t, if (ti->type->iterate_devices && ti->type->iterate_devices(ti, func, data)) return true; - } + } return false; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 601f9e4e6234f..f24d89af7c5f0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1179,9 +1179,9 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO); discard_parent->bi_end_io = passdown_endio; discard_parent->bi_private = m; - if (m->maybe_shared) - passdown_double_checking_shared_status(m, discard_parent); - else { + if (m->maybe_shared) + passdown_double_checking_shared_status(m, discard_parent); + else { struct discard_op op; begin_discard(&op, tc, discard_parent); diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index c6ff43a8f0b25..a705e24d3e2b6 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL); i = j; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 1cc783d7030d8..18d949d63543b 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -726,7 +726,7 @@ static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type * * nodes, so saves metadata space. */ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_index, - struct dm_btree_value_type *vt, uint64_t key) + struct dm_btree_value_type *vt, uint64_t key) { int r; unsigned int middle_index; @@ -781,7 +781,7 @@ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_inde if (shadow_current(s) != right) unlock_block(s->info, right); - return r; + return r; } @@ -1216,7 +1216,7 @@ int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, static bool need_insert(struct btree_node *node, uint64_t *keys, unsigned int level, unsigned int index) { - return ((index >= le32_to_cpu(node->header.nr_entries)) || + return ((index >= le32_to_cpu(node->header.nr_entries)) || (le64_to_cpu(node->keys[index]) != keys[level])); } diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index af800efed9f3c..4833a3998c1d9 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -390,7 +390,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, } int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *b) + dm_block_t begin, dm_block_t end, dm_block_t *b) { int r; uint32_t count; diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 706ceb85d6800..63d9a72e3265c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -120,7 +120,7 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *result); + dm_block_t begin, dm_block_t end, dm_block_t *result); /* * The next three functions return (via nr_allocations) the net number of -- GitLab From 92b3c2437df8fe55a5c7816d9521b1fb7d0718b0 Mon Sep 17 00:00:00 2001 From: Hongyu Jin Date: Wed, 24 Jan 2024 13:35:53 +0800 Subject: [PATCH 1114/1418] dm io: Support IO priority [ Upstream commit 6e5f0f6383b4896c7e9b943d84b136149d0f45e9 ] Some IO will dispatch from kworker with different io_context settings than the submitting task, we may need to specify a priority to avoid losing priority. Add IO priority parameter to dm_io() and update all callers. Co-developed-by: Yibin Ding Signed-off-by: Yibin Ding Signed-off-by: Hongyu Jin Reviewed-by: Eric Biggers Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Stable-dep-of: b4d78cfeb304 ("dm-integrity: align the outgoing bio in integrity_recheck") Signed-off-by: Sasha Levin --- drivers/md/dm-bufio.c | 6 +++--- drivers/md/dm-integrity.c | 12 ++++++------ drivers/md/dm-io.c | 23 +++++++++++++---------- drivers/md/dm-kcopyd.c | 4 ++-- drivers/md/dm-log.c | 4 ++-- drivers/md/dm-raid1.c | 6 +++--- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm-writecache.c | 8 ++++---- include/linux/dm-io.h | 3 ++- 10 files changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 100a6a236d92a..ec662f97ba828 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -614,7 +614,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - r = dm_io(&io_req, 1, ®ion, NULL); + r = dm_io(&io_req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) b->end_io(b, errno_to_blk_status(r)); } @@ -1375,7 +1375,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); @@ -1398,7 +1398,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 94382e43ea506..aff818eb31fbb 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -579,7 +579,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) } } - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) return r; @@ -1089,7 +1089,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? "reading journal" : "writing journal", r); @@ -1205,7 +1205,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, u io_loc.sector = target; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { WARN_ONCE(1, "asynchronous dm_io failed: %d", r); fn(-1UL, data); @@ -1532,7 +1532,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat fr.io_reg.count = 0, fr.ic = ic; init_completion(&fr.comp); - r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); BUG_ON(r); } @@ -1737,7 +1737,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks io_loc.sector = sector; io_loc.count = ic->sectors_per_block; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dio->bi_status = errno_to_blk_status(r); goto free_ret; @@ -2774,7 +2774,7 @@ next_chunk: io_loc.sector = get_data_sector(ic, area, offset); io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, "reading data", r); goto err; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e488b05e35fa3..ec97658387c39 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -295,7 +295,7 @@ static void km_dp_init(struct dpages *dp, void *data) *---------------------------------------------------------------*/ static void do_region(const blk_opf_t opf, unsigned int region, struct dm_io_region *where, struct dpages *dp, - struct io *io) + struct io *io, unsigned short ioprio) { struct bio *bio; struct page *page; @@ -344,6 +344,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; + bio->bi_ioprio = ioprio; store_io_and_region_in_bio(bio, io, region); if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { @@ -371,7 +372,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, - struct io *io, int sync) + struct io *io, int sync, unsigned short ioprio) { int i; struct dpages old_pages = *dp; @@ -388,7 +389,7 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions, for (i = 0; i < num_regions; i++) { *dp = old_pages; if (where[i].count || (opf & REQ_PREFLUSH)) - do_region(opf, i, where + i, dp, io); + do_region(opf, i, where + i, dp, io, ioprio); } /* @@ -413,7 +414,7 @@ static void sync_io_complete(unsigned long error, void *context) static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, - unsigned long *error_bits) + unsigned long *error_bits, unsigned short ioprio) { struct io *io; struct sync_io sio; @@ -435,7 +436,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 1); + dispatch_io(opf, num_regions, where, dp, io, 1, ioprio); wait_for_completion_io(&sio.wait); @@ -447,7 +448,8 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, static int async_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, - struct dpages *dp, io_notify_fn fn, void *context) + struct dpages *dp, io_notify_fn fn, void *context, + unsigned short ioprio) { struct io *io; @@ -467,7 +469,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 0); + dispatch_io(opf, num_regions, where, dp, io, 0, ioprio); return 0; } @@ -509,7 +511,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, } int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *where, unsigned long *sync_error_bits) + struct dm_io_region *where, unsigned long *sync_error_bits, + unsigned short ioprio) { int r; struct dpages dp; @@ -520,11 +523,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_opf, &dp, sync_error_bits); + io_req->bi_opf, &dp, sync_error_bits, ioprio); return async_io(io_req->client, num_regions, where, io_req->bi_opf, &dp, io_req->notify.fn, - io_req->notify.context); + io_req->notify.context, ioprio); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 0ef78e56aa88c..fda51bd140ed3 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -572,9 +572,9 @@ static int run_io_job(struct kcopyd_job *job) io_job_start(job->kc->throttle); if (job->op == REQ_OP_READ) - r = dm_io(&io_req, 1, &job->source, NULL); + r = dm_io(&io_req, 1, &job->source, NULL, IOPRIO_DEFAULT); else - r = dm_io(&io_req, job->num_dests, job->dests, NULL); + r = dm_io(&io_req, job->num_dests, job->dests, NULL, IOPRIO_DEFAULT); return r; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index b7dd5a0cd58ba..da77878cb2c02 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -295,7 +295,7 @@ static int rw_header(struct log_c *lc, enum req_op op) { lc->io_req.bi_opf = op; - return dm_io(&lc->io_req, 1, &lc->header_location, NULL); + return dm_io(&lc->io_req, 1, &lc->header_location, NULL, IOPRIO_DEFAULT); } static int flush_header(struct log_c *lc) @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - return dm_io(&lc->io_req, 1, &null_location, NULL); + return dm_io(&lc->io_req, 1, &null_location, NULL, IOPRIO_DEFAULT); } static int read_header(struct log_c *log) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 2327645fc0648..1004199ae77ac 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -273,7 +273,7 @@ static int mirror_flush(struct dm_target *ti) } error_bits = -1; - dm_io(&io_req, ms->nr_mirrors, io, &error_bits); + dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT); if (unlikely(error_bits != 0)) { for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error_bits)) @@ -543,7 +543,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio) map_region(&io, m, bio); bio_set_m(bio, m); - BUG_ON(dm_io(&io_req, 1, &io, NULL)); + BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT)); } static inline int region_in_sync(struct mirror_set *ms, region_t region, @@ -670,7 +670,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) */ bio_set_m(bio, get_default_mirror(ms)); - BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); + BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT)); } static void do_writes(struct mirror_set *ms, struct bio_list *writes) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 80b95746a43e0..eee1cd3aa3fcf 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -220,7 +220,7 @@ static void do_metadata(struct work_struct *work) { struct mdata_req *req = container_of(work, struct mdata_req, work); - req->result = dm_io(req->io_req, 1, req->where, NULL); + req->result = dm_io(req->io_req, 1, req->where, NULL, IOPRIO_DEFAULT); } /* @@ -244,7 +244,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, struct mdata_req req; if (!metadata) - return dm_io(&io_req, 1, &where, NULL); + return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT); req.where = &where; req.io_req = &io_req; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b48e1b59e6da4..6a707b41dc865 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -503,7 +503,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, io_loc.bdev = v->data_dev->bdev; io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) goto free_ret; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index a705e24d3e2b6..20fc84b24fc75 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); i = j; } @@ -568,7 +568,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) req.notify.fn = NULL; req.notify.context = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error writing superblock"); } @@ -596,7 +596,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) req.client = wc->dm_io; req.notify.fn = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error flushing metadata: %d", r); } @@ -984,7 +984,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors req.client = wc->dm_io; req.notify.fn = NULL; - return dm_io(&req, 1, ®ion, NULL); + return dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); } static void writecache_resume(struct dm_target *ti) diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 92e7abfe04f92..70b3737052dd2 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -79,7 +79,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ -- GitLab From aa587257e146a6d7450225c02d28d6c1320d173a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 21 Mar 2024 17:48:45 +0100 Subject: [PATCH 1115/1418] dm-integrity: align the outgoing bio in integrity_recheck [ Upstream commit b4d78cfeb30476239cf08f4f40afc095c173d6e3 ] It is possible to set up dm-integrity with smaller sector size than the logical sector size of the underlying device. In this situation, dm-integrity guarantees that the outgoing bios have the same alignment as incoming bios (so, if you create a filesystem with 4k block size, dm-integrity would send 4k-aligned bios to the underlying device). This guarantee was broken when integrity_recheck was implemented. integrity_recheck sends bio that is aligned to ic->sectors_per_block. So if we set up integrity with 512-byte sector size on a device with logical block size 4k, we would be sending unaligned bio. This triggered a bug in one of our internal tests. This commit fixes it by determining the actual alignment of the incoming bio and then makes sure that the outgoing bio in integrity_recheck has the same alignment. Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aff818eb31fbb..9c9e2b50c63c3 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1709,7 +1709,6 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks struct bio_vec bv; sector_t sector, logical_sector, area, offset; struct page *page; - void *buffer; get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, @@ -1718,13 +1717,14 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks logical_sector = dio->range.logical_sector; page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); - buffer = page_to_virt(page); __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { unsigned pos = 0; do { + sector_t alignment; char *mem; + char *buffer = page_to_virt(page); int r; struct dm_io_request io_req; struct dm_io_region io_loc; @@ -1737,6 +1737,14 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks io_loc.sector = sector; io_loc.count = ic->sectors_per_block; + /* Align the bio to logical block size */ + alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT); + alignment &= -alignment; + io_loc.sector = round_down(io_loc.sector, alignment); + io_loc.count += sector - io_loc.sector; + buffer += (sector - io_loc.sector) << SECTOR_SHIFT; + io_loc.count = round_up(io_loc.count, alignment); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dio->bi_status = errno_to_blk_status(r); -- GitLab From f07ffd18d787b0227f95d94f678232a30c0e27c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Mar 2024 16:26:16 +0100 Subject: [PATCH 1116/1418] x86/efistub: Clear decompressor BSS in native EFI entrypoint [ Upstream commit b3810c5a2cc4a6665f7a65bed5393c75ce3f3aa2 ] The EFI stub on x86 no longer invokes the decompressor as a subsequent boot stage, but calls into the decompression code directly while running in the context of the EFI boot services. This means that when using the native EFI entrypoint (as opposed to the EFI handover protocol, which clears BSS explicitly), the firmware PE image loader is being relied upon to ensure that BSS is zeroed before the EFI stub is entered from the firmware. As Radek's report proves, this is a bad idea. Not all loaders do this correctly, which means some global variables that should be statically initialized to 0x0 may have junk in them. So clear BSS explicitly when entering via efi_pe_entry(). Note that zeroing BSS from C code is not generally safe, but in this case, the following assignment and dereference of a global pointer variable ensures that the memset() cannot be deferred or reordered. Cc: # v6.1+ Reported-by: Radek Podgorny Closes: https://lore.kernel.org/all/a99a831a-8ad5-4cb0-bff9-be637311f771@podgorny.cz Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/x86-stub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 784e1b2ae5ccd..aa07051459f52 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -21,6 +21,8 @@ #include "efistub.h" #include "x86-stub.h" +extern char _bss[], _ebss[]; + const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; static efi_loaded_image_t *image = NULL; @@ -432,6 +434,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_status_t status; char *cmdline_ptr; + memset(_bss, 0, _ebss - _bss); + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ @@ -950,8 +954,6 @@ fail: void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params) { - extern char _bss[], _ebss[]; - memset(_bss, 0, _ebss - _bss); efi_stub_entry(handle, sys_table_arg, boot_params); } -- GitLab From 4b0c54792394ab3d8b1df29a069844fc277b8f14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Mar 2024 17:01:45 +0100 Subject: [PATCH 1117/1418] x86/efistub: Don't clear BSS twice in mixed mode [ Upstream commit df7ecce842b846a04d087ba85fdb79a90e26a1b0 ] Clearing BSS should only be done once, at the very beginning. efi_pe_entry() is the entrypoint from the firmware, which may not clear BSS and so it is done explicitly. However, efi_pe_entry() is also used as an entrypoint by the mixed mode startup code, in which case BSS will already have been cleared, and doing it again at this point will corrupt global variables holding the firmware's GDT/IDT and segment selectors. So make the memset() conditional on whether the EFI stub is running in native mode. Fixes: b3810c5a2cc4a666 ("x86/efistub: Clear decompressor BSS in native EFI entrypoint") Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/x86-stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index aa07051459f52..dc50dda40239e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -434,7 +434,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_status_t status; char *cmdline_ptr; - memset(_bss, 0, _ebss - _bss); + if (efi_is_native()) + memset(_bss, 0, _ebss - _bss); efi_system_table = sys_table_arg; -- GitLab From 9b2e8276bfb6bdb26becbbcc509b3a7b93600384 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Jul 2023 21:56:49 +0200 Subject: [PATCH 1118/1418] remoteproc: stm32: fix incorrect optional pointers commit fb2bdd32b231b70e6a3f1054528692f604db25d8 upstream. Compile-testing without CONFIG_OF shows that the of_match_ptr() macro was used incorrectly here: drivers/remoteproc/stm32_rproc.c:662:34: warning: unused variable 'stm32_rproc_match' [-Wunused-const-variable] As in almost every driver, the solution is simply to remove the use of this macro. The same thing happened with the deprecated SIMPLE_DEV_PM_OPS(), but the corresponding warning was already shut up with __maybe_unused annotations, so fix those as well by using the correct DEFINE_SIMPLE_DEV_PM_OPS() macros and removing the extraneous __maybe_unused modifiers. For completeness, also add a pm_ptr() to let the PM ops be eliminated completely when CONFIG_PM is turned off. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307242300.ia82qBTp-lkp@intel.com Fixes: 03bd158e1535 ("remoteproc: stm32: use correct format strings on 64-bit") Fixes: 410119ee29b6 ("remoteproc: stm32: wakeup the system by wdg irq") Fixes: 13140de09cc2 ("remoteproc: stm32: add an ST stm32_rproc driver") Signed-off-by: Arnd Bergmann Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20230724195704.2432382-1-arnd@kernel.org Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/stm32_rproc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 385e931603ad3..74da0393172c5 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -889,7 +889,7 @@ static int stm32_rproc_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused stm32_rproc_suspend(struct device *dev) +static int stm32_rproc_suspend(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -900,7 +900,7 @@ static int __maybe_unused stm32_rproc_suspend(struct device *dev) return 0; } -static int __maybe_unused stm32_rproc_resume(struct device *dev) +static int stm32_rproc_resume(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -911,16 +911,16 @@ static int __maybe_unused stm32_rproc_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, - stm32_rproc_suspend, stm32_rproc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, + stm32_rproc_suspend, stm32_rproc_resume); static struct platform_driver stm32_rproc_driver = { .probe = stm32_rproc_probe, .remove = stm32_rproc_remove, .driver = { .name = "stm32-rproc", - .pm = &stm32_rproc_pm_ops, - .of_match_table = of_match_ptr(stm32_rproc_match), + .pm = pm_ptr(&stm32_rproc_pm_ops), + .of_match_table = stm32_rproc_match, }, }; module_platform_driver(stm32_rproc_driver); -- GitLab From e5cd595e23c1a075359a337c0e5c3a4f2dc28dd1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 24 Mar 2024 14:37:56 -0400 Subject: [PATCH 1119/1418] Linux 6.1.83 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: kernelci.org bot Tested-by: Mateusz Jończyk Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5345f3ebed0d..38657b3dda2cd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = Curry Ramen -- GitLab From f1ee75aa66c79976d73179f22b311a17611888db Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 24 Jan 2023 10:33:18 -0600 Subject: [PATCH 1120/1418] x86/cpu: Support AMD Automatic IBRS commit e7862eda309ecfccc36bb5558d937ed3ace07f3f upstream. The AMD Zen4 core supports a new feature called Automatic IBRS. It is a "set-and-forget" feature that means that, like Intel's Enhanced IBRS, h/w manages its IBRS mitigation resources automatically across CPL transitions. The feature is advertised by CPUID_Fn80000021_EAX bit 8 and is enabled by setting MSR C000_0080 (EFER) bit 21. Enable Automatic IBRS by default if the CPU feature is present. It typically provides greater performance over the incumbent generic retpolines mitigation. Reuse the SPECTRE_V2_EIBRS spectre_v2_mitigation enum. AMD Automatic IBRS and Intel Enhanced IBRS have similar enablement. Add NO_EIBRS_PBRSB to cpu_vuln_whitelist, since AMD Automatic IBRS isn't affected by PBRSB-eIBRS. The kernel command line option spectre_v2=eibrs is used to select AMD Automatic IBRS, if available. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Acked-by: Sean Christopherson Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20230124163319.2277355-8-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 6 +++--- .../admin-guide/kernel-parameters.txt | 6 +++--- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 20 +++++++++++-------- arch/x86/kernel/cpu/common.c | 19 ++++++++++-------- 6 files changed, 32 insertions(+), 22 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index a39bbfe9526b6..4d186f599d90f 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -621,9 +621,9 @@ kernel command line. retpoline,generic Retpolines retpoline,lfence LFENCE; indirect branch retpoline,amd alias for retpoline,lfence - eibrs enhanced IBRS - eibrs,retpoline enhanced IBRS + Retpolines - eibrs,lfence enhanced IBRS + LFENCE + eibrs Enhanced/Auto IBRS + eibrs,retpoline Enhanced/Auto IBRS + Retpolines + eibrs,lfence Enhanced/Auto IBRS + LFENCE ibrs use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2dfe75104e7de..f7dfdd3d021ea 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5765,9 +5765,9 @@ retpoline,generic - Retpolines retpoline,lfence - LFENCE; indirect branch retpoline,amd - alias for retpoline,lfence - eibrs - enhanced IBRS - eibrs,retpoline - enhanced IBRS + Retpolines - eibrs,lfence - enhanced IBRS + LFENCE + eibrs - Enhanced/Auto IBRS + eibrs,retpoline - Enhanced/Auto IBRS + Retpolines + eibrs,lfence - Enhanced/Auto IBRS + LFENCE ibrs - use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b97a70aa4de90..9a157942ae3dd 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -427,6 +427,7 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 005e41dc7ee5a..33a19ef23644d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -30,6 +30,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -38,6 +39,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c68789fdc123b..e7074a1b6ba78 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1396,9 +1396,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", - [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", - [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines", [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; @@ -1467,7 +1467,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -1652,8 +1652,12 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - update_spec_ctrl(x86_spec_ctrl_base); + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) { + msr_set_bit(MSR_EFER, _EFER_AUTOIBRS); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + update_spec_ctrl(x86_spec_ctrl_base); + } } switch (mode) { @@ -1737,8 +1741,8 @@ static void __init spectre_v2_select_mitigation(void) /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around - * firmware calls only when IBRS / Enhanced IBRS aren't otherwise - * enabled. + * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't + * otherwise enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 758938c94b41e..ca243d7ba0ea5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1212,8 +1212,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* Zhaoxin Family 7 */ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), @@ -1362,8 +1362,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (ia32_cap & ARCH_CAP_IBRS_ALL) + /* + * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature + * flag and protect from vendor-specific bugs via the whitelist. + */ + if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { @@ -1425,11 +1433,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_RETBLEED); } - if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && - !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) - setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); - if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); -- GitLab From 3769db86adec4dd3d88446792da1f9ca5d964e34 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 9 Aug 2022 17:32:02 +0200 Subject: [PATCH 1121/1418] x86/bugs: Use sysfs_emit() commit 1d30800c0c0ae1d086ffad2bdf0ba4403370f132 upstream. Those mitigations are very talkative; use the printing helper which pays attention to the buffer size. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220809153419.10182-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 103 ++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7074a1b6ba78..750fb4fc2ac6a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2572,74 +2572,74 @@ static const char * const l1tf_vmx_states[] = { static ssize_t l1tf_show_state(char *buf) { if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && sched_smt_active())) { - return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation]); + return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); } - return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t itlb_multihit_show_state(char *buf) { if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || !boot_cpu_has(X86_FEATURE_VMX)) - return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n"); else if (!(cr4_read_shadow() & X86_CR4_VMXE)) - return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n"); else if (itlb_multihit_kvm_mitigation) - return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n"); else - return sprintf(buf, "KVM: Vulnerable\n"); + return sysfs_emit(buf, "KVM: Vulnerable\n"); } #else static ssize_t l1tf_show_state(char *buf) { - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); } static ssize_t itlb_multihit_show_state(char *buf) { - return sprintf(buf, "Processor vulnerable\n"); + return sysfs_emit(buf, "Processor vulnerable\n"); } #endif static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - mds_strings[mds_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); } if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : - sched_smt_active() ? "mitigated" : "disabled")); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t tsx_async_abort_show_state(char *buf) { if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || (taa_mitigation == TAA_MITIGATION_OFF)) - return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]); if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); } - return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t mmio_stale_data_show_state(char *buf) @@ -2712,47 +2712,46 @@ static char *pbrsb_eibrs_state(void) static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) - return sprintf(buf, "Vulnerable: LFENCE\n"); + return sysfs_emit(buf, "Vulnerable: LFENCE\n"); if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) - return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); if (sched_smt_active() && unprivileged_ebpf_enabled() && spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s%s\n", - spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - pbrsb_eibrs_state(), - spectre_v2_module_string()); + return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), + spectre_v2_module_string()); } static ssize_t srbds_show_state(char *buf) { - return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]); } static ssize_t retbleed_show_state(char *buf) { if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); - return sprintf(buf, "%s; SMT %s\n", - retbleed_strings[retbleed_mitigation], - !sched_smt_active() ? "disabled" : - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? - "enabled with STIBP protection" : "vulnerable"); + return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); } - return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t gds_show_state(char *buf) @@ -2774,26 +2773,26 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); switch (bug) { case X86_BUG_CPU_MELTDOWN: if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + return sysfs_emit(buf, "Mitigation: PTI\n"); if (hypervisor_is_type(X86_HYPER_XEN_PV)) - return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); + return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: - return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]); case X86_BUG_L1TF: if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) @@ -2832,7 +2831,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr break; } - return sprintf(buf, "Vulnerable\n"); + return sysfs_emit(buf, "Vulnerable\n"); } ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -- GitLab From 8d70aa08326c559638353dd68b1d9bb35e4fd867 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Nov 2022 20:58:39 +0800 Subject: [PATCH 1122/1418] KVM: x86: Update KVM-only leaf handling to allow for 100% KVM-only leafs commit 047c7229906152fb85c23dc18fd25a00cd7cb4de upstream. Rename kvm_cpu_cap_init_scattered() to kvm_cpu_cap_init_kvm_defined() in anticipation of adding KVM-only CPUID leafs that aren't recognized by the kernel and thus not scattered, i.e. for leafs that are 100% KVM-defined. Adjust/add comments to kvm_only_cpuid_leafs and KVM_X86_FEATURE to document how to create new kvm_only_cpuid_leafs entries for scattered features as well as features that are entirely unknown to the kernel. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221125125845.1182922-3-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 ++++---- arch/x86/kvm/reverse_cpuid.h | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c3ef1fc602bf9..1180938917ce3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -535,9 +535,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -547,7 +547,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -656,7 +656,7 @@ void kvm_set_cpu_caps(void) F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) ); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7eeade35a425b..9b73fb8b87d99 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,9 +7,9 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, @@ -18,6 +18,18 @@ enum kvm_only_cpuid_leafs { NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ -- GitLab From b6aa21725fbe6ab9f0f1c08d97b77b31d71c5066 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 23 Oct 2023 17:16:35 -0700 Subject: [PATCH 1123/1418] KVM: x86: Advertise CPUID.(EAX=7,ECX=2):EDX[5:0] to userspace commit eefe5e6682099445f77f2d97d4c525f9ac9d9b07 upstream. The low five bits {INTEL_PSFD, IPRED_CTRL, RRSBA_CTRL, DDPD_U, BHI_CTRL} advertise the availability of specific bits in IA32_SPEC_CTRL. Since KVM dynamically determines the legal IA32_SPEC_CTRL bits for the underlying hardware, the hard work has already been done. Just let userspace know that a guest can use these IA32_SPEC_CTRL bits. The sixth bit (MCDT_NO) states that the processor does not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior. This is an inherent property of the physical processor that is inherited by the virtual CPU. Pass that information on to userspace. Signed-off-by: Jim Mattson Reviewed-by: Chao Gao Link: https://lore.kernel.org/r/20231024001636.890236-1-jmattson@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++--- arch/x86/kvm/reverse_cpuid.h | 12 ++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1180938917ce3..62a44455c51d0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -652,6 +652,11 @@ void kvm_set_cpu_caps(void) F(AVX_VNNI) | F(AVX512_BF16) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | + F(BHI_CTRL) | F(MCDT_NO) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); @@ -902,13 +907,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* function 7 has additional index. */ case 7: - entry->eax = min(entry->eax, 1u); + max_idx = entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ - if (entry->eax == 1) { + /* KVM only supports up to 0x7.2, capped above via min(). */ + if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; @@ -918,6 +923,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = 0; entry->edx = 0; } + if (max_idx >= 2) { + entry = do_host_cpuid(array, function, 2); + if (!entry) + goto out; + + cpuid_entry_override(entry, CPUID_7_2_EDX); + entry->ecx = 0; + entry->ebx = 0; + entry->eax = 0; + } break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 9b73fb8b87d99..d50b653cb8d09 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -13,6 +13,7 @@ */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_2_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -36,6 +37,14 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ +#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) +#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) +#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) +#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) + struct cpuid_reg { u32 function; u32 index; @@ -61,6 +70,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, }; /* @@ -91,6 +101,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX1; else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX2; + else if (x86_feature == X86_FEATURE_RRSBA_CTRL) + return KVM_X86_FEATURE_RRSBA_CTRL; return x86_feature; } -- GitLab From 981cf0cab48a11fd2011895213f44e2460da03bb Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 23 Oct 2023 17:16:36 -0700 Subject: [PATCH 1124/1418] KVM: x86: Use a switch statement and macros in __feature_translate() commit 80c883db87d9ffe2d685e91ba07a087b1c246c78 upstream. Use a switch statement with macro-generated case statements to handle translating feature flags in order to reduce the probability of runtime errors due to copy+paste goofs, to make compile-time errors easier to debug, and to make the code more readable. E.g. the compiler won't directly generate an error for duplicate if statements if (x86_feature == X86_FEATURE_SGX1) return KVM_X86_FEATURE_SGX1; else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX1; and so instead reverse_cpuid_check() will fail due to the untranslated entry pointing at a Linux-defined leaf, which provides practically no hint as to what is broken arch/x86/kvm/reverse_cpuid.h:108:2: error: call to __compiletime_assert_450 declared with 'error' attribute: BUILD_BUG_ON failed: x86_leaf == CPUID_LNX_4 BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); ^ whereas duplicate case statements very explicitly point at the offending code: arch/x86/kvm/reverse_cpuid.h:125:2: error: duplicate case value '361' KVM_X86_TRANSLATE_FEATURE(SGX2); ^ arch/x86/kvm/reverse_cpuid.h:124:2: error: duplicate case value '360' KVM_X86_TRANSLATE_FEATURE(SGX1); ^ And without macros, the opposite type of copy+paste goof doesn't generate any error at compile-time, e.g. this yields no complaints: case X86_FEATURE_SGX1: return KVM_X86_FEATURE_SGX1; case X86_FEATURE_SGX2: return KVM_X86_FEATURE_SGX1; Note, __feature_translate() is forcibly inlined and the feature is known at compile-time, so the code generation between an if-elif sequence and a switch statement should be identical. Signed-off-by: Jim Mattson Link: https://lore.kernel.org/r/20231024001636.890236-2-jmattson@google.com [sean: use a macro, rewrite changelog] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/reverse_cpuid.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index d50b653cb8d09..7c8e2b20a13b0 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -97,14 +97,16 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) */ static __always_inline u32 __feature_translate(int x86_feature) { - if (x86_feature == X86_FEATURE_SGX1) - return KVM_X86_FEATURE_SGX1; - else if (x86_feature == X86_FEATURE_SGX2) - return KVM_X86_FEATURE_SGX2; - else if (x86_feature == X86_FEATURE_RRSBA_CTRL) - return KVM_X86_FEATURE_RRSBA_CTRL; - - return x86_feature; +#define KVM_X86_TRANSLATE_FEATURE(f) \ + case X86_FEATURE_##f: return KVM_X86_FEATURE_##f + + switch (x86_feature) { + KVM_X86_TRANSLATE_FEATURE(SGX1); + KVM_X86_TRANSLATE_FEATURE(SGX2); + KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + default: + return x86_feature; + } } static __always_inline u32 __feature_leaf(int x86_feature) -- GitLab From a50fd9871257f2cd78e367ad0c1140cb86c4366e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:40 +0100 Subject: [PATCH 1125/1418] timers: Update kernel-doc for various functions [ Upstream commit 14f043f1340bf30bc60af127bff39f55889fef26 ] The kernel-doc of timer related functions is partially uncomprehensible word salad. Rewrite it to make it useful. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201624.828703870@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- kernel/time/timer.c | 148 +++++++++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 58 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 717fcb9fb14aa..ab9688a2ae190 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1121,14 +1121,16 @@ out_unlock: } /** - * mod_timer_pending - modify a pending timer's timeout - * @timer: the pending timer to be modified - * @expires: new timeout in jiffies + * mod_timer_pending - Modify a pending timer's timeout + * @timer: The pending timer to be modified + * @expires: New absolute timeout in jiffies * - * mod_timer_pending() is the same for pending timers as mod_timer(), - * but will not re-activate and modify already deleted timers. + * mod_timer_pending() is the same for pending timers as mod_timer(), but + * will not activate inactive timers. * - * It is useful for unserialized use of timers. + * Return: + * * %0 - The timer was inactive and not modified + * * %1 - The timer was active and requeued to expire at @expires */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { @@ -1137,24 +1139,27 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(mod_timer_pending); /** - * mod_timer - modify a timer's timeout - * @timer: the timer to be modified - * @expires: new timeout in jiffies - * - * mod_timer() is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) + * mod_timer - Modify a timer's timeout + * @timer: The timer to be modified + * @expires: New absolute timeout in jiffies * * mod_timer(timer, expires) is equivalent to: * * del_timer(timer); timer->expires = expires; add_timer(timer); * + * mod_timer() is more efficient than the above open coded sequence. In + * case that the timer is inactive, the del_timer() part is a NOP. The + * timer is in any case activated with the new expiry time @expires. + * * Note that if there are multiple unserialized concurrent users of the * same timer, then mod_timer() is the only safe way to modify the timeout, * since add_timer() cannot modify an already running timer. * - * The function returns whether it has modified a pending timer or not. - * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an - * active timer returns 1.) + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires did + * not change the effective expiry time */ int mod_timer(struct timer_list *timer, unsigned long expires) { @@ -1165,11 +1170,18 @@ EXPORT_SYMBOL(mod_timer); /** * timer_reduce - Modify a timer's timeout if it would reduce the timeout * @timer: The timer to be modified - * @expires: New timeout in jiffies + * @expires: New absolute timeout in jiffies * * timer_reduce() is very similar to mod_timer(), except that it will only - * modify a running timer if that would reduce the expiration time (it will - * start a timer that isn't running). + * modify an enqueued timer if that would reduce the expiration time. If + * @timer is not enqueued it starts the timer. + * + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires + * did not change the effective expiry time such that the + * timer would expire earlier than already scheduled */ int timer_reduce(struct timer_list *timer, unsigned long expires) { @@ -1178,18 +1190,21 @@ int timer_reduce(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(timer_reduce); /** - * add_timer - start a timer - * @timer: the timer to be added + * add_timer - Start a timer + * @timer: The timer to be started * - * The kernel will do a ->function(@timer) callback from the - * timer interrupt at the ->expires point in the future. The - * current time is 'jiffies'. + * Start @timer to expire at @timer->expires in the future. @timer->expires + * is the absolute expiry time measured in 'jiffies'. When the timer expires + * timer->function(timer) will be invoked from soft interrupt context. * - * The timer's ->expires, ->function fields must be set prior calling this - * function. + * The @timer->expires and @timer->function fields must be set prior + * to calling this function. + * + * If @timer->expires is already in the past @timer will be queued to + * expire at the next timer tick. * - * Timers with an ->expires field in the past will be executed in the next - * timer tick. + * This can only operate on an inactive timer. Attempts to invoke this on + * an active timer are rejected with a warning. */ void add_timer(struct timer_list *timer) { @@ -1199,11 +1214,13 @@ void add_timer(struct timer_list *timer) EXPORT_SYMBOL(add_timer); /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * add_timer_on - Start a timer on a particular CPU + * @timer: The timer to be started + * @cpu: The CPU to start it on + * + * Same as add_timer() except that it starts the timer on the given CPU. * - * This is not very scalable on SMP. Double adds are not possible. + * See add_timer() for further details. */ void add_timer_on(struct timer_list *timer, int cpu) { @@ -1238,15 +1255,18 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactivate a timer. - * @timer: the timer to be deactivated - * - * del_timer() deactivates a timer - this works on both active and inactive - * timers. - * - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) + * del_timer - Deactivate a timer. + * @timer: The timer to be deactivated + * + * The function only deactivates a pending timer, but contrary to + * del_timer_sync() it does not take into account whether the timer's + * callback function is concurrently executed on a different CPU or not. + * It neither prevents rearming of the timer. If @timer can be rearmed + * concurrently then the return value of this function is meaningless. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer(struct timer_list *timer) { @@ -1268,10 +1288,19 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer to delete + * @timer: Timer to deactivate + * + * This function tries to deactivate a timer. On success the timer is not + * queued and the timer callback function is not running on any CPU. * - * This function tries to deactivate a timer. Upon successful (ret >= 0) - * exit the timer is not queued and the handler is not running on any CPU. + * This function does not guarantee that the timer cannot be rearmed right + * after dropping the base lock. That needs to be prevented by the calling + * code if necessary. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated + * * %-1 - The timer callback function is running on a different CPU */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -1367,23 +1396,19 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated - * - * This function only differs from del_timer() on SMP: besides deactivating - * the timer it also makes sure the handler has finished executing on other - * CPUs. + * del_timer_sync - Deactivate a timer and wait for the handler to finish. + * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts unless the timer is an irqsafe one. The caller must - * not hold locks which would prevent completion of the timer's - * handler. The timer's handler must not call add_timer_on(). Upon exit the - * timer is not queued and the handler is not running on any CPU. + * not hold locks which would prevent completion of the timer's callback + * function. The timer's handler must not call add_timer_on(). Upon exit + * the timer is not queued and the handler is not running on any CPU. * - * Note: For !irqsafe timers, you must not hold locks that are held in - * interrupt context while calling this function. Even if the lock has - * nothing to do with the timer in question. Here's why:: + * For !irqsafe timers, the caller must not hold locks that are held in + * interrupt context. Even if the lock has nothing to do with the timer in + * question. Here's why:: * * CPU0 CPU1 * ---- ---- @@ -1397,10 +1422,17 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * while (base->running_timer == mytimer); * * Now del_timer_sync() will never return and never release somelock. - * The interrupt on the other CPU is waiting to grab somelock but - * it has interrupted the softirq that CPU0 is waiting to finish. + * The interrupt on the other CPU is waiting to grab somelock but it has + * interrupted the softirq that CPU0 is waiting to finish. + * + * This function cannot guarantee that the timer is not rearmed again by + * some concurrent or preempting code, right after it dropped the base + * lock. If there is the possibility of a concurrent rearm then the return + * value of the function is meaningless. * - * The function returns whether it has deactivated a pending timer or not. + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer_sync(struct timer_list *timer) { -- GitLab From 2957037c1d94650d19c2bd176373def0c43c6d6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:42 +0100 Subject: [PATCH 1126/1418] timers: Use del_timer_sync() even on UP [ Upstream commit 168f6b6ffbeec0b9333f3582e4cf637300858db5 ] del_timer_sync() is assumed to be pointless on uniprocessor systems and can be mapped to del_timer() because in theory del_timer() can never be invoked while the timer callback function is executed. This is not entirely true because del_timer() can be invoked from interrupt context and therefore hit in the middle of a running timer callback. Contrary to that del_timer_sync() is not allowed to be invoked from interrupt context unless the affected timer is marked with TIMER_IRQSAFE. del_timer_sync() has proper checks in place to detect such a situation. Give up on the UP optimization and make del_timer_sync() unconditionally available. Co-developed-by: Steven Rostedt Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/all/20220407161745.7d6754b3@gandalf.local.home Link: https://lore.kernel.org/all/20221110064101.429013735@goodmis.org Link: https://lore.kernel.org/r/20221123201624.888306160@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- include/linux/timer.h | 7 +------ kernel/time/timer.c | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 648f00105f588..82bb2e4d3b7c2 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,12 +183,7 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); - -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +extern int del_timer_sync(struct timer_list *timer); #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ab9688a2ae190..9d09a2a0ad708 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1394,7 +1394,6 @@ static inline void timer_sync_wait_running(struct timer_base *base) { } static inline void del_timer_wait_running(struct timer_list *timer) { } #endif -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** * del_timer_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated @@ -1475,7 +1474,6 @@ int del_timer_sync(struct timer_list *timer) return ret; } EXPORT_SYMBOL(del_timer_sync); -#endif static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), -- GitLab From 113d5341ee1299aff7a6252cb19af52160ba22e8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:44 +0100 Subject: [PATCH 1127/1418] timers: Rename del_timer_sync() to timer_delete_sync() [ Upstream commit 9b13df3fb64ee95e2397585404e442afee2c7d4f ] The timer related functions do not have a strict timer_ prefixed namespace which is really annoying. Rename del_timer_sync() to timer_delete_sync() and provide del_timer_sync() as a wrapper. Document that del_timer_sync() is not for new code. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Steven Rostedt (Google) Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201624.954785441@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- include/linux/timer.h | 15 ++++++++++++++- kernel/time/timer.c | 18 +++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 82bb2e4d3b7c2..6d18f04ad7039 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,7 +183,20 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -extern int del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); + +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9d09a2a0ad708..59469897432bc 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1083,7 +1083,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, - * otherwise del_timer_sync() can't detect that the timer's + * otherwise timer_delete_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ @@ -1259,7 +1259,7 @@ EXPORT_SYMBOL_GPL(add_timer_on); * @timer: The timer to be deactivated * * The function only deactivates a pending timer, but contrary to - * del_timer_sync() it does not take into account whether the timer's + * timer_delete_sync() it does not take into account whether the timer's * callback function is concurrently executed on a different CPU or not. * It neither prevents rearming of the timer. If @timer can be rearmed * concurrently then the return value of this function is meaningless. @@ -1395,7 +1395,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } #endif /** - * del_timer_sync - Deactivate a timer and wait for the handler to finish. + * timer_delete_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, @@ -1417,10 +1417,10 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * spin_lock_irq(somelock); * * spin_lock(somelock); - * del_timer_sync(mytimer); + * timer_delete_sync(mytimer); * while (base->running_timer == mytimer); * - * Now del_timer_sync() will never return and never release somelock. + * Now timer_delete_sync() will never return and never release somelock. * The interrupt on the other CPU is waiting to grab somelock but it has * interrupted the softirq that CPU0 is waiting to finish. * @@ -1433,7 +1433,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ -int del_timer_sync(struct timer_list *timer) +int timer_delete_sync(struct timer_list *timer) { int ret; @@ -1473,7 +1473,7 @@ int del_timer_sync(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(del_timer_sync); +EXPORT_SYMBOL(timer_delete_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), @@ -1495,8 +1495,8 @@ static void call_timer_fn(struct timer_list *timer, #endif /* * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map around the fn() - * call here and in del_timer_sync(). + * timer_delete_sync() by acquiring the lock_map around the fn() + * call here and in timer_delete_sync(). */ lock_map_acquire(&lockdep_map); -- GitLab From 0b812f706fd7090be74812101114a0e165b36744 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 7 Jan 2024 08:25:04 +0100 Subject: [PATCH 1128/1418] wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach [ Upstream commit 0f7352557a35ab7888bc7831411ec8a3cbe20d78 ] This is the candidate patch of CVE-2023-47233 : https://nvd.nist.gov/vuln/detail/CVE-2023-47233 In brcm80211 driver,it starts with the following invoking chain to start init a timeout worker: ->brcmf_usb_probe ->brcmf_usb_probe_cb ->brcmf_attach ->brcmf_bus_started ->brcmf_cfg80211_attach ->wl_init_priv ->brcmf_init_escan ->INIT_WORK(&cfg->escan_timeout_work, brcmf_cfg80211_escan_timeout_worker); If we disconnect the USB by hotplug, it will call brcmf_usb_disconnect to make cleanup. The invoking chain is : brcmf_usb_disconnect ->brcmf_usb_disconnect_cb ->brcmf_detach ->brcmf_cfg80211_detach ->kfree(cfg); While the timeout woker may still be running. This will cause a use-after-free bug on cfg in brcmf_cfg80211_escan_timeout_worker. Fix it by deleting the timer and canceling the worker in brcmf_cfg80211_detach. Fixes: e756af5b30b0 ("brcmfmac: add e-scan support.") Signed-off-by: Zheng Wang Cc: stable@vger.kernel.org [arend.vanspriel@broadcom.com: keep timer delete as is and cancel work just before free] Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240107072504.392713-1-arend.vanspriel@broadcom.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index ad5a8d61d9385..24a3d5a593f15 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -791,8 +791,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, scan_request = cfg->scan_request; cfg->scan_request = NULL; - if (timer_pending(&cfg->escan_timeout)) - del_timer_sync(&cfg->escan_timeout); + timer_delete_sync(&cfg->escan_timeout); if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ @@ -7805,6 +7804,7 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg) brcmf_btcoex_detach(cfg); wiphy_unregister(cfg->wiphy); wl_deinit_priv(cfg); + cancel_work_sync(&cfg->escan_timeout_work); brcmf_free_wiphy(cfg->wiphy); kfree(cfg); } -- GitLab From e144e47703ce67f5f467cdc6c8c43157bcd8b4d1 Mon Sep 17 00:00:00 2001 From: Hidenori Kobayashi Date: Tue, 9 Jan 2024 17:09:09 +0900 Subject: [PATCH 1129/1418] media: staging: ipu3-imgu: Set fields before media_entity_pads_init() [ Upstream commit 87318b7092670d4086bfec115a0280a60c51c2dd ] The imgu driver fails to probe with the following message because it does not set the pad's flags before calling media_entity_pads_init(). [ 14.596315] ipu3-imgu 0000:00:05.0: failed initialize subdev media entity (-22) [ 14.596322] ipu3-imgu 0000:00:05.0: failed to register subdev0 ret (-22) [ 14.596327] ipu3-imgu 0000:00:05.0: failed to register pipes (-22) [ 14.596331] ipu3-imgu 0000:00:05.0: failed to create V4L2 devices (-22) Fix the initialization order so that the driver probe succeeds. The ops initialization is also moved together for readability. Fixes: a0ca1627b450 ("media: staging/intel-ipu3: Add v4l2 driver based on media framework") Cc: # 6.7 Cc: Dan Carpenter Signed-off-by: Hidenori Kobayashi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/ipu3/ipu3-v4l2.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index e530767e80a5d..55cc44a401bc4 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -1069,6 +1069,11 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, struct imgu_media_pipe *imgu_pipe = &imgu->imgu_pipe[pipe]; /* Initialize subdev media entity */ + imgu_sd->subdev.entity.ops = &imgu_media_ops; + for (i = 0; i < IMGU_NODE_NUM; i++) { + imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? + MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; + } r = media_entity_pads_init(&imgu_sd->subdev.entity, IMGU_NODE_NUM, imgu_sd->subdev_pads); if (r) { @@ -1076,11 +1081,6 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, "failed initialize subdev media entity (%d)\n", r); return r; } - imgu_sd->subdev.entity.ops = &imgu_media_ops; - for (i = 0; i < IMGU_NODE_NUM; i++) { - imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? - MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; - } /* Initialize subdev */ v4l2_subdev_init(&imgu_sd->subdev, &imgu_subdev_ops); @@ -1177,15 +1177,15 @@ static int imgu_v4l2_node_setup(struct imgu_device *imgu, unsigned int pipe, } /* Initialize media entities */ + node->vdev_pad.flags = node->output ? + MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; + vdev->entity.ops = NULL; r = media_entity_pads_init(&vdev->entity, 1, &node->vdev_pad); if (r) { dev_err(dev, "failed initialize media entity (%d)\n", r); mutex_destroy(&node->lock); return r; } - node->vdev_pad.flags = node->output ? - MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; - vdev->entity.ops = NULL; /* Initialize vbq */ vbq->type = node->vdev_fmt.type; -- GitLab From 5166fc45d7eafbbe47417c5effb47191c08f33ab Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Mon, 18 Dec 2023 19:32:36 +0530 Subject: [PATCH 1130/1418] arm64: dts: qcom: sc7280: Add additional MSI interrupts [ Upstream commit b8ba66b40da3230a8675cb5dd5c2dea5bce24d62 ] Current MSI's mapping doesn't have all the vectors. This platform supports 8 vectors each vector supports 32 MSI's, so total MSI's supported is 256. Add all the MSI groups supported for this PCIe instance in this platform. Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related nodes") cc: stable@vger.kernel.org Signed-off-by: Krishna chaitanya chundru Link: https://lore.kernel.org/r/20231218-additional_msi-v1-1-de6917392684@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 04106d7254000..b5cd24d59ad9a 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2028,8 +2028,16 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - interrupts = ; - interrupt-names = "msi"; + interrupts = , + , + , + , + , + , + , + ; + interrupt-names = "msi0", "msi1", "msi2", "msi3", + "msi4", "msi5", "msi6", "msi7"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, -- GitLab From 9875deeefedfbfba08d4f261a2237549b1f918a8 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Sun, 17 Dec 2023 13:36:59 +0800 Subject: [PATCH 1131/1418] remoteproc: virtio: Fix wdg cannot recovery remote processor [ Upstream commit b327c72753d6a78de37aed6c35756f2ef62897ee ] Recovery remote processor failed when wdg irq received: [ 0.842574] remoteproc remoteproc0: crash detected in cix-dsp-rproc: type watchdog [ 0.842750] remoteproc remoteproc0: handling crash #1 in cix-dsp-rproc [ 0.842824] remoteproc remoteproc0: recovering cix-dsp-rproc [ 0.843342] remoteproc remoteproc0: stopped remote processor cix-dsp-rproc [ 0.847901] rproc-virtio rproc-virtio.0.auto: Failed to associate buffer [ 0.847979] remoteproc remoteproc0: failed to probe subdevices for cix-dsp-rproc: -16 The reason is that dma coherent mem would not be released when recovering the remote processor, due to rproc_virtio_remove() would not be called, where the mem released. It will fail when it try to allocate and associate buffer again. Releasing reserved memory from rproc_virtio_dev_release(), instead of rproc_virtio_remove(). Fixes: 1d7b61c06dc3 ("remoteproc: virtio: Create platform device for the remoteproc_virtio") Signed-off-by: Joakim Zhang Acked-by: Arnaud Pouliquen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231217053659.3245745-1-joakim.zhang@cixtech.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/remoteproc_virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 0e95525c11581..ab5e4f02ab225 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -351,6 +351,9 @@ static void rproc_virtio_dev_release(struct device *dev) kfree(vdev); + of_reserved_mem_device_release(&rvdev->pdev->dev); + dma_release_coherent_memory(&rvdev->pdev->dev); + put_device(&rvdev->pdev->dev); } @@ -584,9 +587,6 @@ static int rproc_virtio_remove(struct platform_device *pdev) rproc_remove_subdev(rproc, &rvdev->subdev); rproc_remove_rvdev(rvdev); - of_reserved_mem_device_release(&pdev->dev); - dma_release_coherent_memory(&pdev->dev); - put_device(&rproc->dev); return 0; -- GitLab From a881dd09fd9920dec3dd5b3627837bd52206a4bf Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 23 Jan 2024 11:58:14 +0530 Subject: [PATCH 1132/1418] clk: qcom: gcc-sdm845: Add soft dependency on rpmhpd [ Upstream commit 1d9054e3a4fd36e2949e616f7360bdb81bcc1921 ] With the addition of RPMh power domain to the GCC node in device tree, we noticed a significant delay in getting the UFS driver probed on AOSP which futher led to mount failures because Android do not support rootwait. So adding a soft dependency on RPMh power domain which informs modprobe to load rpmhpd module before gcc-sdm845. Cc: stable@vger.kernel.org # v5.4+ Fixes: 4b6ea15c0a11 ("arm64: dts: qcom: sdm845: Add missing RPMh power domain to GCC") Suggested-by: Manivannan Sadhasivam Signed-off-by: Amit Pundir Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240123062814.2555649-1-amit.pundir@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-sdm845.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index 6af08e0ca8475..ef15e8f114027 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -4038,3 +4038,4 @@ module_exit(gcc_sdm845_exit); MODULE_DESCRIPTION("QTI GCC SDM845 Driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:gcc-sdm845"); +MODULE_SOFTDEP("pre: rpmhpd"); -- GitLab From d4b1c3b5ec1c14b5e89f4fd849cda8b5ab85f0fe Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 16 Nov 2023 10:01:21 +0100 Subject: [PATCH 1133/1418] smack: Set SMACK64TRANSMUTE only for dirs in smack_inode_setxattr() [ Upstream commit 9c82169208dde516510aaba6bbd8b13976690c5d ] Since the SMACK64TRANSMUTE xattr makes sense only for directories, enforce this restriction in smack_inode_setxattr(). Cc: stable@vger.kernel.org Fixes: 5c6d1125f8db ("Smack: Transmute labels on specified directories") # v2.6.38.x Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index fbadc61feedd1..07f7351148ecf 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1309,7 +1309,8 @@ static int smack_inode_setxattr(struct user_namespace *mnt_userns, check_star = 1; } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) { check_priv = 1; - if (size != TRANS_TRUE_SIZE || + if (!S_ISDIR(d_backing_inode(dentry)->i_mode) || + size != TRANS_TRUE_SIZE || strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) rc = -EINVAL; } else -- GitLab From dee9c25865550782e831cb8d6e80dd29817078ac Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 16 Nov 2023 10:01:22 +0100 Subject: [PATCH 1134/1418] smack: Handle SMACK64TRANSMUTE in smack_inode_setsecurity() [ Upstream commit ac02f007d64eb2769d0bde742aac4d7a5fc6e8a5 ] If the SMACK64TRANSMUTE xattr is provided, and the inode is a directory, update the in-memory inode flags by setting SMK_INODE_TRANSMUTE. Cc: stable@vger.kernel.org Fixes: 5c6d1125f8db ("Smack: Transmute labels on specified directories") # v2.6.38.x Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 07f7351148ecf..feba69549d086 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2783,6 +2783,15 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, if (value == NULL || size > SMK_LONGLABEL || size == 0) return -EINVAL; + if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) { + if (!S_ISDIR(inode->i_mode) || size != TRANS_TRUE_SIZE || + strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) + return -EINVAL; + + nsp->smk_flags |= SMK_INODE_TRANSMUTE; + return 0; + } + skp = smk_import_entry(value, size); if (IS_ERR(skp)) return PTR_ERR(skp); -- GitLab From 74cd997ba17bdc4a24ecaff608304ea93db0b9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Thu, 25 Jan 2024 19:39:32 +0100 Subject: [PATCH 1135/1418] arm: dts: marvell: Fix maxium->maxim typo in brownstone dts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 831e0cd4f9ee15a4f02ae10b67e7fdc10eb2b4fc ] Fix an obvious spelling error in the PMIC compatible in the MMP2 Brownstone DTS file. Fixes: 58f1193e6210 ("mfd: max8925: Add dts") Cc: Signed-off-by: Duje Mihanović Reported-by: Krzysztof Kozlowski Closes: https://lore.kernel.org/linux-devicetree/1410884282-18041-1-git-send-email-k.kozlowski@samsung.com/ Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240125-brownstone-typo-fix-v2-1-45bc48a0c81c@skole.hr [krzysztof: Just 10 years to take a patch, not bad! Rephrased commit msg] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/mmp2-brownstone.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts index 04f1ae1382e7a..bc64348b82185 100644 --- a/arch/arm/boot/dts/mmp2-brownstone.dts +++ b/arch/arm/boot/dts/mmp2-brownstone.dts @@ -28,7 +28,7 @@ &twsi1 { status = "okay"; pmic: max8925@3c { - compatible = "maxium,max8925"; + compatible = "maxim,max8925"; reg = <0x3c>; interrupts = <1>; interrupt-parent = <&intcmux4>; -- GitLab From 07c3fe923ff7eccf684fb4f8c953d0a7cc8ded73 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 10 Jan 2024 15:03:05 -0500 Subject: [PATCH 1136/1418] drm/vmwgfx: Fix possible null pointer derefence with invalid contexts [ Upstream commit 517621b7060096e48e42f545fa6646fc00252eac ] vmw_context_cotable can return either an error or a null pointer and its usage sometimes went unchecked. Subsequent code would then try to access either a null pointer or an error value. The invalid dereferences were only possible with malformed userspace apps which never properly initialized the rendering contexts. Check the results of vmw_context_cotable to fix the invalid derefs. Thanks: ziming zhang(@ezrak1e) from Ant Group Light-Year Security Lab who was the first person to discover it. Niels De Graef who reported it and helped to track down the poc. Fixes: 9c079b8ce8bf ("drm/vmwgfx: Adapt execbuf to the new validation api") Cc: # v4.20+ Reported-by: Niels De Graef Signed-off-by: Zack Rusin Cc: Martin Krastev Cc: Maaz Mombasawala Cc: Ian Forbes Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240110200305.94086-1-zack.rusin@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index bc7f02e4ecebb..2f7ac91149fc0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -447,7 +447,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, vmw_res_type(ctx) == vmw_res_dx_context) { for (i = 0; i < cotable_max; ++i) { res = vmw_context_cotable(ctx, i); - if (IS_ERR(res)) + if (IS_ERR_OR_NULL(res)) continue; ret = vmw_execbuf_res_val_add(sw_context, res, @@ -1259,6 +1259,8 @@ static int vmw_cmd_dx_define_query(struct vmw_private *dev_priv, return -EINVAL; cotable_res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXQUERY); + if (IS_ERR_OR_NULL(cotable_res)) + return cotable_res ? PTR_ERR(cotable_res) : -EINVAL; ret = vmw_cotable_notify(cotable_res, cmd->body.queryId); return ret; @@ -2477,6 +2479,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, return ret; res = vmw_context_cotable(ctx_node->ctx, vmw_view_cotables[view_type]); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->defined_id); if (unlikely(ret != 0)) return ret; @@ -2562,8 +2566,8 @@ static int vmw_cmd_dx_so_define(struct vmw_private *dev_priv, so_type = vmw_so_cmd_to_type(header->id); res = vmw_context_cotable(ctx_node->ctx, vmw_so_cotables[so_type]); - if (IS_ERR(res)) - return PTR_ERR(res); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cotable_notify(res, cmd->defined_id); @@ -2682,6 +2686,8 @@ static int vmw_cmd_dx_define_shader(struct vmw_private *dev_priv, return -EINVAL; res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXSHADER); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.shaderId); if (ret) return ret; @@ -3003,6 +3009,8 @@ static int vmw_cmd_dx_define_streamoutput(struct vmw_private *dev_priv, } res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_STREAMOUTPUT); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.soid); if (ret) return ret; -- GitLab From 12609c76b755dbeb1645c0aacc0f0f4743b2eff3 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 18 Jan 2024 10:21:57 -0500 Subject: [PATCH 1137/1418] serial: max310x: fix NULL pointer dereference in I2C instantiation [ Upstream commit 0d27056c24efd3d63a03f3edfbcfc4827086b110 ] When trying to instantiate a max14830 device from userspace: echo max14830 0x60 > /sys/bus/i2c/devices/i2c-2/new_device we get the following error: Unable to handle kernel NULL pointer dereference at virtual address... ... Call trace: max310x_i2c_probe+0x48/0x170 [max310x] i2c_device_probe+0x150/0x2a0 ... Add check for validity of devtype to prevent the error, and abort probe with a meaningful error message. Fixes: 2e1f2d9a9bdb ("serial: max310x: implement I2C support") Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240118152213.2644269-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 444f89eb2d4b7..d409ef3887212 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1633,13 +1633,16 @@ static unsigned short max310x_i2c_slave_addr(unsigned short addr, static int max310x_i2c_probe(struct i2c_client *client) { - const struct max310x_devtype *devtype = - device_get_match_data(&client->dev); + const struct max310x_devtype *devtype; struct i2c_client *port_client; struct regmap *regmaps[4]; unsigned int i; u8 port_addr; + devtype = device_get_match_data(&client->dev); + if (!devtype) + return dev_err_probe(&client->dev, -ENODEV, "Failed to match device\n"); + if (client->addr < devtype->slave_addr.min || client->addr > devtype->slave_addr.max) return dev_err_probe(&client->dev, -EINVAL, -- GitLab From 6d21d0356aa44157a62e39c0d1a13d4c69a8d0c8 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 31 Jan 2024 10:00:20 +0100 Subject: [PATCH 1138/1418] pci_iounmap(): Fix MMIO mapping leak [ Upstream commit 7626913652cc786c238e2dd7d8740b17d41b2637 ] The #ifdef ARCH_HAS_GENERIC_IOPORT_MAP accidentally also guards iounmap(), which means MMIO mappings are leaked. Move the guard so we call iounmap() for MMIO mappings. Fixes: 316e8d79a095 ("pci_iounmap'2: Electric Boogaloo: try to make sense of it all") Link: https://lore.kernel.org/r/20240131090023.12331-2-pstanner@redhat.com Reported-by: Danilo Krummrich Suggested-by: Arnd Bergmann Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann Cc: # v5.15+ Signed-off-by: Sasha Levin --- lib/pci_iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index ce39ce9f3526e..2829ddb0e316b 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -170,8 +170,8 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *p) if (addr >= start && addr < start + IO_SPACE_LIMIT) return; - iounmap(p); #endif + iounmap(p); } EXPORT_SYMBOL(pci_iounmap); -- GitLab From 09c1be4d581d3356159abcc5a8a7a6c5f1bf1e77 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 22 Dec 2023 13:50:30 +0800 Subject: [PATCH 1139/1418] media: xc4000: Fix atomicity violation in xc4000_get_frequency [ Upstream commit 36d503ad547d1c75758a6fcdbec2806f1b6aeb41 ] In xc4000_get_frequency(): *freq = priv->freq_hz + priv->freq_offset; The code accesses priv->freq_hz and priv->freq_offset without holding any lock. In xc4000_set_params(): // Code that updates priv->freq_hz and priv->freq_offset ... xc4000_get_frequency() and xc4000_set_params() may execute concurrently, risking inconsistent reads of priv->freq_hz and priv->freq_offset. Since these related data may update during reading, it can result in incorrect frequency calculation, leading to atomicity violations. This possible bug is found by an experimental static analysis tool developed by our team, BassCheck[1]. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 6.2. To address this issue, it is proposed to add a mutex lock pair in xc4000_get_frequency() to ensure atomicity. With this patch applied, our tool no longer reports the possible bug, with the kernel configuration allyesconfig for x86_64. Due to the lack of associated hardware, we cannot test the patch in runtime testing, and just verify it according to the code logic. [1] https://sites.google.com/view/basscheck/ Fixes: 4c07e32884ab ("[media] xc4000: Fix get_frequency()") Cc: stable@vger.kernel.org Reported-by: BassCheck Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/tuners/xc4000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 57ded9ff3f043..29bc63021c5aa 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -1515,10 +1515,10 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; + mutex_lock(&priv->lock); *freq = priv->freq_hz + priv->freq_offset; if (debug) { - mutex_lock(&priv->lock); if ((priv->cur_fw.type & (BASE | FM | DTV6 | DTV7 | DTV78 | DTV8)) == BASE) { u16 snr = 0; @@ -1529,8 +1529,8 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) return 0; } } - mutex_unlock(&priv->lock); } + mutex_unlock(&priv->lock); dprintk(1, "%s()\n", __func__); -- GitLab From 1025ff4e6a7abaa1070414f0b05b0b13df1a194d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Jan 2024 15:55:40 +0200 Subject: [PATCH 1140/1418] media: mc: Add local pad to pipeline regardless of the link state [ Upstream commit 78f0daa026d4c5e192d31801d1be6caf88250220 ] When building pipelines by following links, the media_pipeline_explore_next_link() function only traverses enabled links. The remote pad of a disabled link is not added to the pipeline, and neither is the local pad. While the former is correct as disabled links should not be followed, not adding the local pad breaks processing of the MEDIA_PAD_FL_MUST_CONNECT flag. The MEDIA_PAD_FL_MUST_CONNECT flag is checked in the __media_pipeline_start() function that iterates over all pads after populating the pipeline. If the pad is not present, the check gets skipped, rendering it useless. Fix this by adding the local pad of all links regardless of their state, only skipping the remote pad for disabled links. Cc: stable@vger.kernel.org # 6.1 Fixes: ae219872834a ("media: mc: entity: Rewrite media_pipeline_start()") Reported-by: Frieder Schrempf Closes: https://lore.kernel.org/linux-media/7658a15a-80c5-219f-2477-2a94ba6c6ba1@kontron.de Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index f268cf66053e1..20a2630455f2c 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -594,13 +594,6 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); - /* Skip links that are not enabled. */ - if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { - dev_dbg(walk->mdev->dev, - "media pipeline: skipping link (disabled)\n"); - return 0; - } - /* Get the local pad and remote pad. */ if (link->source->entity == pad->entity) { local = link->source; @@ -622,13 +615,20 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, } /* - * Add the local and remote pads of the link to the pipeline and push - * them to the stack, if they're not already present. + * Add the local pad of the link to the pipeline and push it to the + * stack, if not already present. */ ret = media_pipeline_add_pad(pipe, walk, local); if (ret) return ret; + /* Similarly, add the remote pad, but only if the link is enabled. */ + if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { + dev_dbg(walk->mdev->dev, + "media pipeline: skipping link (disabled)\n"); + return 0; + } + ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; -- GitLab From f29793c33984f4d41448ba55da7432b153dabd89 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:24:12 +0200 Subject: [PATCH 1141/1418] media: mc: Fix flags handling when creating pad links [ Upstream commit 422f7af75d03d50895938d38bc9cb8be759c440f ] The media_create_pad_link() function doesn't correctly clear reject link type flags, nor does it set the DATA_LINK flag. It only works because the MEDIA_LNK_FL_DATA_LINK flag's value is 0. Fix it by returning an error if any link type flag is set. This doesn't introduce any regression, as nobody calls the media_create_pad_link() function with link type flags (easily checked by grepping for the flag in the source code, there are very few hits). Set the MEDIA_LNK_FL_DATA_LINK explicitly, which is a no-op that the compiler will optimize out, but is still useful to make the code more explicit and easier to understand. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 20a2630455f2c..688780c8734d4 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -1017,6 +1017,11 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_link *link; struct media_link *backlink; + if (flags & MEDIA_LNK_FL_LINK_TYPE) + return -EINVAL; + + flags |= MEDIA_LNK_FL_DATA_LINK; + if (WARN_ON(!source || !sink) || WARN_ON(source_pad >= source->num_pads) || WARN_ON(sink_pad >= sink->num_pads)) @@ -1032,7 +1037,7 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, link->source = &source->pads[source_pad]; link->sink = &sink->pads[sink_pad]; - link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK; + link->flags = flags; /* Initialize graph object embedded at the new link */ media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK, -- GitLab From cff51913c5cc42b45ea010e91eb0c1e349d4292f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:30:02 +0200 Subject: [PATCH 1142/1418] media: mc: Add num_links flag to media_pad [ Upstream commit baeddf94aa61879b118f2faa37ed126d772670cc ] Maintain a counter of the links connected to a pad in the media_pad structure. This helps checking if a pad is connected to anything, which will be used in the pipeline building code. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 6 ++++++ include/media/media-entity.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 688780c8734d4..c7cb49205b017 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -957,6 +957,9 @@ static void __media_entity_remove_link(struct media_entity *entity, /* Remove the reverse links for a data link. */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) { + link->source->num_links--; + link->sink->num_links--; + if (link->source->entity == entity) remote = link->sink->entity; else @@ -1068,6 +1071,9 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, sink->num_links++; source->num_links++; + link->source->num_links++; + link->sink->num_links++; + return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 28c9de8a1f348..03bb0963942bd 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -205,6 +205,7 @@ enum media_pad_signal_type { * @graph_obj: Embedded structure containing the media object common data * @entity: Entity this pad belongs to * @index: Pad index in the entity pads array, numbered from 0 to n + * @num_links: Number of links connected to this pad * @sig_type: Type of the signal inside a media pad * @flags: Pad flags, as defined in * :ref:`include/uapi/linux/media.h ` @@ -216,6 +217,7 @@ struct media_pad { struct media_gobj graph_obj; /* must be first field in struct */ struct media_entity *entity; u16 index; + u16 num_links; enum media_pad_signal_type sig_type; unsigned long flags; -- GitLab From 61656ca92ea9391ed1ea9da0fc38205a8e22c230 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:30:02 +0200 Subject: [PATCH 1143/1418] media: mc: Rename pad variable to clarify intent [ Upstream commit 9ec9109cf9f611e3ec9ed0355afcc7aae5e73176 ] The pad local variable in the media_pipeline_explore_next_link() function is used to store the pad through which the entity has been reached. Rename it to origin to reflect that and make the code easier to read. This will be even more important in subsequent commits when expanding the function with additional logic. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index c7cb49205b017..50b68b4dde5d0 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -579,13 +579,13 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk); - struct media_pad *pad; + struct media_pad *origin; struct media_link *link; struct media_pad *local; struct media_pad *remote; int ret; - pad = entry->pad; + origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); media_pipeline_walk_pop(walk); @@ -595,7 +595,7 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, link->sink->entity->name, link->sink->index); /* Get the local pad and remote pad. */ - if (link->source->entity == pad->entity) { + if (link->source->entity == origin->entity) { local = link->source; remote = link->sink; } else { @@ -607,8 +607,9 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, * Skip links that originate from a different pad than the incoming pad * that is not connected internally in the entity to the incoming pad. */ - if (pad != local && - !media_entity_has_pad_interdep(pad->entity, pad->index, local->index)) { + if (origin != local && + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); return 0; -- GitLab From e2c545b841a7ae2c1796c9968fcf47166075cfb1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 01:04:52 +0200 Subject: [PATCH 1144/1418] media: mc: Expand MUST_CONNECT flag to always require an enabled link [ Upstream commit b3decc5ce7d778224d266423b542326ad469cb5f ] The MEDIA_PAD_FL_MUST_CONNECT flag indicates that the pad requires an enabled link to stream, but only if it has any link at all. This makes little sense, as if a pad is part of a pipeline, there are very few use cases for an active link to be mandatory only if links exist at all. A review of in-tree drivers confirms they all need an enabled link for pads marked with the MEDIA_PAD_FL_MUST_CONNECT flag. Expand the scope of the flag by rejecting pads that have no links at all. This requires modifying the pipeline build code to add those pads to the pipeline. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/mediactl/media-types.rst | 11 ++-- drivers/media/mc/mc-entity.c | 53 +++++++++++++++---- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/Documentation/userspace-api/media/mediactl/media-types.rst b/Documentation/userspace-api/media/mediactl/media-types.rst index 0ffeece1e0c8e..6332e8395263b 100644 --- a/Documentation/userspace-api/media/mediactl/media-types.rst +++ b/Documentation/userspace-api/media/mediactl/media-types.rst @@ -375,12 +375,11 @@ Types and flags used to represent the media graph elements are origins of links. * - ``MEDIA_PAD_FL_MUST_CONNECT`` - - If this flag is set and the pad is linked to any other pad, then - at least one of those links must be enabled for the entity to be - able to stream. There could be temporary reasons (e.g. device - configuration dependent) for the pad to need enabled links even - when this flag isn't set; the absence of the flag doesn't imply - there is none. + - If this flag is set, then for this pad to be able to stream, it must + be connected by at least one enabled link. There could be temporary + reasons (e.g. device configuration dependent) for the pad to need + enabled links even when this flag isn't set; the absence of the flag + doesn't imply there is none. One and only one of ``MEDIA_PAD_FL_SINK`` and ``MEDIA_PAD_FL_SOURCE`` diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 50b68b4dde5d0..8919df09e3e8d 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -509,14 +509,15 @@ static int media_pipeline_walk_push(struct media_pipeline_walk *walk, /* * Move the top entry link cursor to the next link. If all links of the entry - * have been visited, pop the entry itself. + * have been visited, pop the entry itself. Return true if the entry has been + * popped. */ -static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) +static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry; if (WARN_ON(walk->stack.top < 0)) - return; + return false; entry = media_pipeline_walk_top(walk); @@ -526,7 +527,7 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) walk->stack.top); walk->stack.top--; - return; + return true; } entry->links = entry->links->next; @@ -534,6 +535,8 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) dev_dbg(walk->mdev->dev, "media pipeline: moved entry %u to next link\n", walk->stack.top); + + return false; } /* Free all memory allocated while walking the pipeline. */ @@ -583,11 +586,12 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_link *link; struct media_pad *local; struct media_pad *remote; + bool last_link; int ret; origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); - media_pipeline_walk_pop(walk); + last_link = media_pipeline_walk_pop(walk); dev_dbg(walk->mdev->dev, "media pipeline: exploring link '%s':%u -> '%s':%u\n", @@ -612,7 +616,7 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); - return 0; + goto done; } /* @@ -627,13 +631,44 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (disabled)\n"); - return 0; + goto done; } ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; +done: + /* + * If we're done iterating over links, iterate over pads of the entity. + * This is necessary to discover pads that are not connected with any + * link. Those are dead ends from a pipeline exploration point of view, + * but are still part of the pipeline and need to be added to enable + * proper validation. + */ + if (!last_link) + return 0; + + dev_dbg(walk->mdev->dev, + "media pipeline: adding unconnected pads of '%s'\n", + local->entity->name); + + media_entity_for_each_pad(origin->entity, local) { + /* + * Skip the origin pad (already handled), pad that have links + * (already discovered through iterating over links) and pads + * not internally connected. + */ + if (origin == local || !local->num_links || + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) + continue; + + ret = media_pipeline_add_pad(pipe, walk, local); + if (ret) + return ret; + } + return 0; } @@ -745,7 +780,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, struct media_pad *pad = ppad->pad; struct media_entity *entity = pad->entity; bool has_enabled_link = false; - bool has_link = false; struct media_link *link; dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name, @@ -775,7 +809,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, /* Record if the pad has links and enabled links. */ if (link->flags & MEDIA_LNK_FL_ENABLED) has_enabled_link = true; - has_link = true; /* * Validate the link if it's enabled and has the @@ -813,7 +846,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set, * ensure that it has either no link or an enabled link. */ - if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link && + if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && !has_enabled_link) { dev_dbg(mdev->dev, "Pad '%s':%u must be connected by an enabled link\n", -- GitLab From b54478d20375874aeee257744dedfd3e413432ff Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Jan 2024 17:15:30 -0800 Subject: [PATCH 1145/1418] KVM: Always flush async #PF workqueue when vCPU is being destroyed [ Upstream commit 3d75b8aa5c29058a512db29da7cbee8052724157 ] Always flush the per-vCPU async #PF workqueue when a vCPU is clearing its completion queue, e.g. when a VM and all its vCPUs is being destroyed. KVM must ensure that none of its workqueue callbacks is running when the last reference to the KVM _module_ is put. Gifting a reference to the associated VM prevents the workqueue callback from dereferencing freed vCPU/VM memory, but does not prevent the KVM module from being unloaded before the callback completes. Drop the misguided VM refcount gifting, as calling kvm_put_kvm() from async_pf_execute() if kvm_put_kvm() flushes the async #PF workqueue will result in deadlock. async_pf_execute() can't return until kvm_put_kvm() finishes, and kvm_put_kvm() can't return until async_pf_execute() finishes: WARNING: CPU: 8 PID: 251 at virt/kvm/kvm_main.c:1435 kvm_put_kvm+0x2d/0x320 [kvm] Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel kvm irqbypass CPU: 8 PID: 251 Comm: kworker/8:1 Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Workqueue: events async_pf_execute [kvm] RIP: 0010:kvm_put_kvm+0x2d/0x320 [kvm] Call Trace: async_pf_execute+0x198/0x260 [kvm] process_one_work+0x145/0x2d0 worker_thread+0x27e/0x3a0 kthread+0xba/0xe0 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- INFO: task kworker/8:1:251 blocked for more than 120 seconds. Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/8:1 state:D stack:0 pid:251 ppid:2 flags:0x00004000 Workqueue: events async_pf_execute [kvm] Call Trace: __schedule+0x33f/0xa40 schedule+0x53/0xc0 schedule_timeout+0x12a/0x140 __wait_for_common+0x8d/0x1d0 __flush_work.isra.0+0x19f/0x2c0 kvm_clear_async_pf_completion_queue+0x129/0x190 [kvm] kvm_arch_destroy_vm+0x78/0x1b0 [kvm] kvm_put_kvm+0x1c1/0x320 [kvm] async_pf_execute+0x198/0x260 [kvm] process_one_work+0x145/0x2d0 worker_thread+0x27e/0x3a0 kthread+0xba/0xe0 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 If kvm_clear_async_pf_completion_queue() actually flushes the workqueue, then there's no need to gift async_pf_execute() a reference because all invocations of async_pf_execute() will be forced to complete before the vCPU and its VM are destroyed/freed. And that in turn fixes the module unloading bug as __fput() won't do module_put() on the last vCPU reference until the vCPU has been freed, e.g. if closing the vCPU file also puts the last reference to the KVM module. Note that kvm_check_async_pf_completion() may also take the work item off the completion queue and so also needs to flush the work queue, as the work will not be seen by kvm_clear_async_pf_completion_queue(). Waiting on the workqueue could theoretically delay a vCPU due to waiting for the work to complete, but that's a very, very small chance, and likely a very small delay. kvm_arch_async_page_present_queued() unconditionally makes a new request, i.e. will effectively delay entering the guest, so the remaining work is really just: trace_kvm_async_pf_completed(addr, cr2_or_gpa); __kvm_vcpu_wake_up(vcpu); mmput(mm); and mmput() can't drop the last reference to the page tables if the vCPU is still alive, i.e. the vCPU won't get stuck tearing down page tables. Add a helper to do the flushing, specifically to deal with "wakeup all" work items, as they aren't actually work items, i.e. are never placed in a workqueue. Trying to flush a bogus workqueue entry rightly makes __flush_work() complain (kudos to whoever added that sanity check). Note, commit 5f6de5cbebee ("KVM: Prevent module exit until all VMs are freed") *tried* to fix the module refcounting issue by having VMs grab a reference to the module, but that only made the bug slightly harder to hit as it gave async_pf_execute() a bit more time to complete before the KVM module could be unloaded. Fixes: af585b921e5d ("KVM: Halt vcpu if page it tries to access is swapped out") Cc: stable@vger.kernel.org Cc: David Matlack Reviewed-by: Xu Yilun Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240110011533.503302-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- virt/kvm/async_pf.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 9bfe1d6f6529a..adaf6f141804f 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -88,7 +88,27 @@ static void async_pf_execute(struct work_struct *work) __kvm_vcpu_wake_up(vcpu); mmput(mm); - kvm_put_kvm(vcpu->kvm); +} + +static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work) +{ + /* + * The async #PF is "done", but KVM must wait for the work item itself, + * i.e. async_pf_execute(), to run to completion. If KVM is a module, + * KVM must ensure *no* code owned by the KVM (the module) can be run + * after the last call to module_put(). Note, flushing the work item + * is always required when the item is taken off the completion queue. + * E.g. even if the vCPU handles the item in the "normal" path, the VM + * could be terminated before async_pf_execute() completes. + * + * Wake all events skip the queue and go straight done, i.e. don't + * need to be flushed (but sanity check that the work wasn't queued). + */ + if (work->wakeup_all) + WARN_ON_ONCE(work->work.func); + else + flush_work(&work->work); + kmem_cache_free(async_pf_cache, work); } void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) @@ -115,7 +135,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) #else if (cancel_work_sync(&work->work)) { mmput(work->mm); - kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } #endif @@ -127,7 +146,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); - kmem_cache_free(async_pf_cache, work); + + spin_unlock(&vcpu->async_pf.lock); + kvm_flush_and_free_async_pf_work(work); + spin_lock(&vcpu->async_pf.lock); } spin_unlock(&vcpu->async_pf.lock); @@ -152,7 +174,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf.queued--; - kmem_cache_free(async_pf_cache, work); + kvm_flush_and_free_async_pf_work(work); } } @@ -187,7 +209,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, work->arch = *arch; work->mm = current->mm; mmget(work->mm); - kvm_get_kvm(work->vcpu->kvm); INIT_WORK(&work->work, async_pf_execute); -- GitLab From 14cdbd9440a4c23f1347d7e97877953c9a5ab20c Mon Sep 17 00:00:00 2001 From: Tor Vic Date: Fri, 9 Feb 2024 16:42:26 +0100 Subject: [PATCH 1146/1418] cpufreq: amd-pstate: Fix min_perf assignment in amd_pstate_adjust_perf() [ Upstream commit b26ffbf800ae3c8d01bdf90d9cd8a37e1606ff06 ] In the function amd_pstate_adjust_perf(), the 'min_perf' variable is set to 'highest_perf' instead of 'lowest_perf'. Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Reported-by: Oleksandr Natalenko Reviewed-by: Perry Yuan Signed-off-by: Tor Vic Reviewed-by: Mario Limonciello Cc: 6.1+ # 6.1+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index edc294ee5a5bc..90dcf26f09731 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -320,7 +320,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); -- GitLab From 334fb14389b500a6dbf4e70ab2c3dabdddaa1d70 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Feb 2024 00:14:04 +1100 Subject: [PATCH 1147/1418] powerpc/smp: Adjust nr_cpu_ids to cover all threads of a core [ Upstream commit 5580e96dad5a439d561d9648ffcbccb739c2a120 ] If nr_cpu_ids is too low to include at least all the threads of a single core adjust nr_cpu_ids upwards. This avoids triggering odd bugs in code that assumes all threads of a core are available. Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://msgid.link/20231229120107.2281153-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8537c354c560b..a64f4fb332893 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -369,6 +369,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (IS_ENABLED(CONFIG_PPC64)) boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: -- GitLab From 605ddb3a6e5f2ba5c55b73e62dc2e6f2c127eee4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Feb 2024 00:14:04 +1100 Subject: [PATCH 1148/1418] powerpc/smp: Increase nr_cpu_ids to include the boot CPU [ Upstream commit 777f81f0a9c780a6443bcf2c7785f0cc2e87c1ef ] If nr_cpu_ids is too low to include the boot CPU adjust nr_cpu_ids upward. Otherwise the kernel will BUG when trying to allocate a paca for the boot CPU and fail to boot. Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://msgid.link/20231229120107.2281153-2-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a64f4fb332893..9531ab90feb8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -375,6 +375,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, nr_cpu_ids); } + if (boot_cpuid >= nr_cpu_ids) { + set_nr_cpu_ids(min(CONFIG_NR_CPUS, ALIGN(boot_cpuid + 1, nthreads))); + pr_warn("Boot CPU %d >= nr_cpu_ids, adjusted nr_cpu_ids to %d\n", + boot_cpuid, nr_cpu_ids); + } + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: -- GitLab From 0993f7f85d2650fdb2d91dfd6ffd7af991219be8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Feb 2024 21:28:02 -0800 Subject: [PATCH 1149/1418] sparc64: NMI watchdog: fix return value of __setup handler [ Upstream commit 3ed7c61e49d65dacb96db798c0ab6fcd55a1f20f ] __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from setup_nmi_watchdog(). Fixes: e5553a6d0442 ("sparc64: Implement NMI watchdog on capable cpus.") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: Sam Ravnborg Cc: Andrew Morton Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240211052802.22612-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- arch/sparc/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305c..fbf25e926f67c 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -274,7 +274,7 @@ static int __init setup_nmi_watchdog(char *str) if (!strncmp(str, "panic", 5)) panic_on_timeout = 1; - return 0; + return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); -- GitLab From 8557bef80cc3a3ac3aa2a53118294a024e277852 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Feb 2024 21:28:08 -0800 Subject: [PATCH 1150/1418] sparc: vDSO: fix return value of __setup handler [ Upstream commit 5378f00c935bebb846b1fdb0e79cb76c137c56b5 ] __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from vdso_setup(). Fixes: 9a08862a5d2e ("vDSO for sparc") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: Dan Carpenter Cc: Nick Alcock Cc: Sam Ravnborg Cc: Andrew Morton Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240211052808.22635-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- arch/sparc/vdso/vma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index ae9a86cb6f3d9..2b97df0850aa7 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -449,9 +449,8 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); - if (err) - return err; - vdso_enabled = val; - return 0; + if (!err) + vdso_enabled = val; + return 1; } __setup("vdso=", vdso_setup); -- GitLab From efffffde33d64bbed0ccf35224b2e6c4a0923823 Mon Sep 17 00:00:00 2001 From: Svyatoslav Pankratov Date: Mon, 9 Oct 2023 13:27:19 +0100 Subject: [PATCH 1151/1418] crypto: qat - fix double free during reset [ Upstream commit 01aed663e6c421aeafc9c330bda630976b50a764 ] There is no need to free the reset_data structure if the recovery is unsuccessful and the reset is synchronous. The function adf_dev_aer_schedule_reset() handles the cleanup properly. Only asynchronous resets require such structure to be freed inside the reset worker. Fixes: d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework") Signed-off-by: Svyatoslav Pankratov Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Stable-dep-of: 7d42e097607c ("crypto: qat - resolve race condition during AER recovery") Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/adf_aer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fe9bb2f3536a9..fa6b7ecd4c08d 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,7 +95,8 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - kfree(reset_data); + if (reset_data->mode == ADF_DEV_RESET_ASYNC) + kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } -- GitLab From 226fc408c5fcd23cc4186f05ea3a09a7a9aef2f7 Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 9 Feb 2024 13:43:42 +0100 Subject: [PATCH 1152/1418] crypto: qat - resolve race condition during AER recovery [ Upstream commit 7d42e097607c4d246d99225bf2b195b6167a210c ] During the PCI AER system's error recovery process, the kernel driver may encounter a race condition with freeing the reset_data structure's memory. If the device restart will take more than 10 seconds the function scheduling that restart will exit due to a timeout, and the reset_data structure will be freed. However, this data structure is used for completion notification after the restart is completed, which leads to a UAF bug. This results in a KFENCE bug notice. BUG: KFENCE: use-after-free read in adf_device_reset_worker+0x38/0xa0 [intel_qat] Use-after-free read at 0x00000000bc56fddf (in kfence-#142): adf_device_reset_worker+0x38/0xa0 [intel_qat] process_one_work+0x173/0x340 To resolve this race condition, the memory associated to the container of the work_struct is freed on the worker if the timeout expired, otherwise on the function that schedules the worker. The timeout detection can be done by checking if the caller is still waiting for completion or not by using completion_done() function. Fixes: d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework") Cc: Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/adf_aer.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fa6b7ecd4c08d..4f36b5a9164a7 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,7 +95,8 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - if (reset_data->mode == ADF_DEV_RESET_ASYNC) + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; @@ -103,11 +104,19 @@ static void adf_device_reset_worker(struct work_struct *work) adf_dev_restarted_notify(accel_dev); clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); - /* The dev is back alive. Notify the caller if in sync mode */ - if (reset_data->mode == ADF_DEV_RESET_SYNC) - complete(&reset_data->compl); - else + /* + * The dev is back alive. Notify the caller if in sync mode + * + * If device restart will take a more time than expected, + * the schedule_reset() function can timeout and exit. This can be + * detected by calling the completion_done() function. In this case + * the reset_data structure needs to be freed here. + */ + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); + else + complete(&reset_data->compl); } static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, @@ -140,8 +149,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); ret = -EFAULT; + } else { + kfree(reset_data); } - kfree(reset_data); return ret; } return 0; -- GitLab From 81479bf75809808c69bb4beda4f3010700f9a16d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 19 Feb 2024 16:08:02 -0800 Subject: [PATCH 1153/1418] selftests/mqueue: Set timeout to 180 seconds [ Upstream commit 85506aca2eb4ea41223c91c5fe25125953c19b13 ] While mq_perf_tests runs with the default kselftest timeout limit, which is 45 seconds, the test takes about 60 seconds to complete on i3.metal AWS instances. Hence, the test always times out. Increase the timeout to 180 seconds. Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Cc: # 5.4.x Signed-off-by: SeongJae Park Reviewed-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/mqueue/setting | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/mqueue/setting diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting new file mode 100644 index 0000000000000..a953c96aa16e1 --- /dev/null +++ b/tools/testing/selftests/mqueue/setting @@ -0,0 +1 @@ +timeout=180 -- GitLab From 719d7f899df48d662af6c2d36b253934fe9c5040 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 1 Feb 2024 22:18:45 +0800 Subject: [PATCH 1154/1418] ext4: correct best extent lstart adjustment logic [ Upstream commit 4fbf8bc733d14bceb16dda46a3f5e19c6a9621c5 ] When yangerkun review commit 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()"), it was found that the best extent did not completely cover the original request after adjusting the best extent lstart in ext4_mb_new_inode_pa() as follows: original request: 2/10(8) normalized request: 0/64(64) best extent: 0/9(9) When we check if best ex can be kept at start of goal, ac_o_ex.fe_logical is 2 less than the adjusted best extent logical end 9, so we think the adjustment is done. But obviously 0/9(9) doesn't cover 2/10(8), so we should determine here if the original request logical end is less than or equal to the adjusted best extent logical end. In addition, add a comment stating when adjusted best_ex will not cover the original request, and remove the duplicate assertion because adjusting lstart makes no change to b_ex.fe_len. Link: https://lore.kernel.org/r/3630fa7f-b432-7afd-5f79-781bc3b2c5ea@huawei.com Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()") Cc: Signed-off-by: yangerkun Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20240201141845.1879253-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6a3e27771df73..bc0ca45a5d817 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4684,10 +4684,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) .fe_len = ac->ac_g_ex.fe_len, }; loff_t orig_goal_end = extent_logical_end(sbi, &ex); + loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex); - /* we can't allocate as much as normalizer wants. - * so, found space must get proper lstart - * to cover original request */ + /* + * We can't allocate as much as normalizer wants, so we try + * to get proper lstart to cover the original request, except + * when the goal doesn't cover the original request as below: + * + * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048 + * best_ex:0/200(200) -> adjusted: 1848/2048(200) + */ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); @@ -4699,7 +4705,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) * 1. Check if best ex can be kept at end of goal and still * cover original start * 2. Else, check if best ex can be kept at start of goal and - * still cover original start + * still cover original end * 3. Else, keep the best ex at start of original request. */ ex.fe_len = ac->ac_b_ex.fe_len; @@ -4709,7 +4715,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) goto adjust_bex; ex.fe_logical = ac->ac_g_ex.fe_logical; - if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) + if (o_ex_end <= extent_logical_end(sbi, &ex)) goto adjust_bex; ex.fe_logical = ac->ac_o_ex.fe_logical; @@ -4717,7 +4723,6 @@ adjust_bex: ac->ac_b_ex.fe_logical = ex.fe_logical; BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); - BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); } -- GitLab From 5ea241b1931f368096856d4d8c6f653392370b17 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 22 Feb 2024 22:17:23 +0900 Subject: [PATCH 1155/1418] block: Clear zone limits for a non-zoned stacked queue [ Upstream commit c8f6f88d25929ad2f290b428efcae3b526f3eab0 ] Device mapper may create a non-zoned mapped device out of a zoned device (e.g., the dm-zoned target). In such case, some queue limit such as the max_zone_append_sectors and zone_write_granularity endup being non zero values for a block device that is not zoned. Avoid this by clearing these limits in blk_stack_limits() when the stacked zoned limit is false. Fixes: 3093a479727b ("block: inherit the zoned characteristics in blk_stack_limits") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240222131724.1803520-1-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-settings.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index bbca4ce77a2d3..c702f408bbc0a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -680,6 +680,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); + if (!t->zoned) { + t->zone_write_granularity = 0; + t->max_zone_append_sectors = 0; + } return ret; } EXPORT_SYMBOL(blk_stack_limits); -- GitLab From 03b821647b73b4fd6701b4b4e8936e42aa435959 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:15:52 +0100 Subject: [PATCH 1156/1418] kasan/test: avoid gcc warning for intentional overflow [ Upstream commit e10aea105e9ed14b62a11844fec6aaa87c6935a3 ] The out-of-bounds test allocates an object that is three bytes too short in order to validate the bounds checking. Starting with gcc-14, this causes a compile-time warning as gcc has grown smart enough to understand the sizeof() logic: mm/kasan/kasan_test.c: In function 'kmalloc_oob_16': mm/kasan/kasan_test.c:443:14: error: allocation of insufficient size '13' for type 'struct ' with size '16' [-Werror=alloc-size] 443 | ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); | ^ Hide the actual computation behind a RELOC_HIDE() that ensures the compiler misses the intentional bug. Link: https://lkml.kernel.org/r/20240212111609.869266-1-arnd@kernel.org Fixes: 3f15801cdc23 ("lib: add kasan test module") Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Dmitry Vyukov Cc: Marco Elver Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/kasan/kasan_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0d59098f08761..cef683a2e0d2e 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -415,7 +415,8 @@ static void kmalloc_oob_16(struct kunit *test) /* This test is specifically crafted for the generic mode. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + /* RELOC_HIDE to prevent gcc from warning about short alloc */ + ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); -- GitLab From 428ca0000f0abd5c99354c52a36becf2b815ca21 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 10 Oct 2023 15:55:49 +0100 Subject: [PATCH 1157/1418] bounds: support non-power-of-two CONFIG_NR_CPUS [ Upstream commit f2d5dcb48f7ba9e3ff249d58fc1fa963d374e66a ] ilog2() rounds down, so for example when PowerPC 85xx sets CONFIG_NR_CPUS to 24, we will only allocate 4 bits to store the number of CPUs instead of 5. Use bits_per() instead, which rounds up. Found by code inspection. The effect of this would probably be a misaccounting when doing NUMA balancing, so to a user, it would only be a performance penalty. The effects may be more wide-spread; it's hard to tell. Link: https://lkml.kernel.org/r/20231010145549.1244748-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Fixes: 90572890d202 ("mm: numa: Change page last {nid,pid} into {cpu,pid}") Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index b529182e8b04f..c5a9fcd2d6228 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN -- GitLab From c8cc05de8e6b5612b6e9f92c385c1a064b0db375 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 5 Feb 2024 13:26:26 +0100 Subject: [PATCH 1158/1418] fat: fix uninitialized field in nostale filehandles [ Upstream commit fde2497d2bc3a063d8af88b258dbadc86bd7b57c ] When fat_encode_fh_nostale() encodes file handle without a parent it stores only first 10 bytes of the file handle. However the length of the file handle must be a multiple of 4 so the file handle is actually 12 bytes long and the last two bytes remain uninitialized. This is not great at we potentially leak uninitialized information with the handle to userspace. Properly initialize the full handle length. Link: https://lkml.kernel.org/r/20240205122626.13701-1-jack@suse.cz Reported-by: syzbot+3ce5dea5b1539ff36769@syzkaller.appspotmail.com Fixes: ea3983ace6b7 ("fat: restructure export_operations") Signed-off-by: Jan Kara Acked-by: OGAWA Hirofumi Cc: Amir Goldstein Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/fat/nfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index af191371c3529..bab63eeaf9cbc 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, fid->parent_i_gen = parent->i_generation; type = FILEID_FAT_WITH_PARENT; *lenp = FAT_FID_SIZE_WITH_PARENT; + } else { + /* + * We need to initialize this field because the fh is actually + * 12 bytes long + */ + fid->parent_i_pos_hi = 0; } return type; -- GitLab From 142d87c958d9454c3cffa625fab56f3016e8f9f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 24 Jan 2024 17:52:44 +0000 Subject: [PATCH 1159/1418] ubifs: Set page uptodate in the correct place [ Upstream commit 723012cab779eee8228376754e22c6594229bf8f ] Page cache reads are lockless, so setting the freshly allocated page uptodate before we've overwritten it with the data it's supposed to have in it will allow a simultaneous reader to see old data. Move the call to SetPageUptodate into ubifs_write_end(), which is after we copied the new data into the page. Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- fs/ubifs/file.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 10c1779af9c51..f7b1f9ece1364 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -261,9 +261,6 @@ static int write_begin_slow(struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } if (PagePrivate(page)) @@ -462,9 +459,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } err = allocate_budget(c, page, ui, appending); @@ -474,10 +468,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { + if (skipped_read) ClearPageChecked(page); - ClearPageUptodate(page); - } /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -568,6 +560,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, goto out; } + if (len == PAGE_SIZE) + SetPageUptodate(page); + if (!PagePrivate(page)) { attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); -- GitLab From 5ecbc7465f66bb7eb2be2cd423f84000872e3560 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 24 Jan 2024 07:37:02 +0100 Subject: [PATCH 1160/1418] ubi: Check for too small LEB size in VTBL code [ Upstream commit 68a24aba7c593eafa8fd00f2f76407b9b32b47a9 ] If the LEB size is smaller than a volume table record we cannot have volumes. In this case abort attaching. Cc: Chenyuan Yang Cc: stable@vger.kernel.org Fixes: 801c135ce73d ("UBI: Unsorted Block Images") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-mtd/1433EB7A-FC89-47D6-8F47-23BE41B263B3@illinois.edu/ Signed-off-by: Richard Weinberger Reviewed-by: Zhihao Cheng Signed-off-by: Sasha Levin --- drivers/mtd/ubi/vtbl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index f700f0e4f2ec4..6e5489e233dd2 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ + + if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { + ubi_err(ubi, "LEB size too small for a volume record"); + return -EINVAL; + } + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; -- GitLab From 5d1442eeb93689902b8fea2efe041d3054814c86 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 20 Feb 2024 10:49:03 +0800 Subject: [PATCH 1161/1418] ubi: correct the calculation of fastmap size [ Upstream commit 7f174ae4f39e8475adcc09d26c5a43394689ad6c ] Now that the calculation of fastmap size in ubi_calc_fm_size() is incorrect since it miss each user volume's ubi_fm_eba structure and the Internal UBI volume info. Let's correct the calculation. Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/ubi/fastmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index ca2d9efe62c3c..1060e19205d2a 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -85,9 +85,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi) sizeof(struct ubi_fm_scan_pool) + sizeof(struct ubi_fm_scan_pool) + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + - (sizeof(struct ubi_fm_eba) + - (ubi->peb_count * sizeof(__be32))) + - sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + ((sizeof(struct ubi_fm_eba) + + sizeof(struct ubi_fm_volhdr)) * + (UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) + + (ubi->peb_count * sizeof(__be32)); return roundup(size, ubi->leb_size); } -- GitLab From ffbfea10d9c98c98e630e658407da52f341d3ff0 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sun, 11 Feb 2024 00:45:51 +0300 Subject: [PATCH 1162/1418] mtd: rawnand: meson: fix scrambling mode value in command macro [ Upstream commit ef6f463599e16924cdd02ce5056ab52879dc008c ] Scrambling mode is enabled by value (1 << 19). NFC_CMD_SCRAMBLER_ENABLE is already (1 << 19), so there is no need to shift it again in CMDRWGEN macro. Signed-off-by: Arseniy Krasnov Cc: Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240210214551.441610-1-avkrasnov@salutedevices.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/meson_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 0aeac8ccbd0ee..05925fb694602 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -63,7 +63,7 @@ #define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages) \ ( \ (cmd_dir) | \ - ((ran) << 19) | \ + (ran) | \ ((bch) << 14) | \ ((short_mode) << 13) | \ (((page_size) & 0x7f) << 6) | \ -- GitLab From 10857a2412fc123bb6d868698ace39e6b2ffb9fe Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 16 Feb 2024 14:26:55 +0100 Subject: [PATCH 1163/1418] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd() [ Upstream commit e5db6a74571a8baf87a116ea39aab946283362ff ] Convert to use real temp variables instead of clobbering processor registers. This aligns the 64-bit inline assembly code with the 32-bit assembly code which was rewritten with commit 427c1073a2a1 ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). While at it, fix comment in 32-bit rewrite code. Temporary variables are now used for both 32-bit and 64-bit code, so move their declarations to the function header. No functional change intended. Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/unaligned.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 8a8e7d7224a26..782ee05e20889 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -167,6 +167,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; + unsigned long shift, temp1; __u64 val = 0; ASM_EXCEPTIONTABLE_VAR(ret); @@ -178,25 +179,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) #ifdef CONFIG_64BIT __asm__ __volatile__ ( -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ -" mtsp %4, %%sr1\n" -" depd %%r0,63,3,%3\n" -"1: ldd 0(%%sr1,%3),%0\n" -"2: ldd 8(%%sr1,%3),%%r20\n" -" subi 64,%%r19,%%r19\n" -" mtsar %%r19\n" -" shrpd %0,%%r20,%%sar,%0\n" +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */ +" mtsp %5, %%sr1\n" +" depd %%r0,63,3,%2\n" +"1: ldd 0(%%sr1,%2),%0\n" +"2: ldd 8(%%sr1,%2),%4\n" +" subi 64,%3,%3\n" +" mtsar %3\n" +" shrpd %0,%4,%%sar,%0\n" "3: \n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") - : "=r" (val), "+r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20" ); + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); #else - { - unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ " mtsp %5, %%sr1\n" " dep %%r0,31,2,%2\n" "1: ldw 0(%%sr1,%2),%0\n" @@ -212,7 +210,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); - } #endif DPRINTF("val = 0x%llx\n", val); -- GitLab From 6eb684e9c0d0b83ca8b232456258c28ebb6c53f1 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 23 Feb 2024 16:40:51 +0100 Subject: [PATCH 1164/1418] parisc: Avoid clobbering the C/B bits in the PSW with tophys and tovirt macros [ Upstream commit 4603fbaa76b5e703b38ac8cc718102834eb6e330 ] Use add,l to avoid clobbering the C/B bits in the PSW. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Sasha Levin --- arch/parisc/include/asm/assembly.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 5937d5edaba1e..000a28e1c5e8d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -97,26 +97,28 @@ * version takes two arguments: a src and destination register. * However, the source and destination registers can not be * the same register. + * + * We use add,l to avoid clobbering the C/B bits in the PSW. */ .macro tophys grvirt, grphys - ldil L%(__PAGE_OFFSET), \grphys - sub \grvirt, \grphys, \grphys + ldil L%(-__PAGE_OFFSET), \grphys + addl \grvirt, \grphys, \grphys .endm - + .macro tovirt grphys, grvirt ldil L%(__PAGE_OFFSET), \grvirt - add \grphys, \grvirt, \grvirt + addl \grphys, \grvirt, \grvirt .endm .macro tophys_r1 gr - ldil L%(__PAGE_OFFSET), %r1 - sub \gr, %r1, \gr + ldil L%(-__PAGE_OFFSET), %r1 + addl \gr, %r1, \gr .endm - + .macro tovirt_r1 gr ldil L%(__PAGE_OFFSET), %r1 - add \gr, %r1, \gr + addl \gr, %r1, \gr .endm .macro delay value -- GitLab From 3b64d68d90f5ebb90be5423215ea83126fcd46bb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Feb 2024 09:55:26 -0800 Subject: [PATCH 1165/1418] parisc: Fix ip_fast_csum [ Upstream commit a2abae8f0b638c31bb9799d9dd847306e0d005bd ] IP checksum unit tests report the following error when run on hppa/hppa64. # test_ip_fast_csum: ASSERTION FAILED at lib/checksum_kunit.c:463 Expected ( u64)csum_result == ( u64)expected, but ( u64)csum_result == 33754 (0x83da) ( u64)expected == 10946 (0x2ac2) not ok 4 test_ip_fast_csum 0x83da is the expected result if the IP header length is 20 bytes. 0x2ac2 is the expected result if the IP header length is 24 bytes. The test fails with an IP header length of 24 bytes. It appears that ip_fast_csum() always returns the checksum for a 20-byte header, no matter how long the header actually is. Code analysis shows a suspicious assembler sequence in ip_fast_csum(). " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" " addib,< 0, %2, 1b\n" <--- While my understanding of HPPA assembler is limited, it does not seem to make much sense to subtract 0 from a register and to expect the result to ever be negative. Subtracting 1 from the length parameter makes more sense. On top of that, the operation should be repeated if and only if the result is still > 0, so change the suspicious instruction to " addib,> -1, %2, 1b\n" The IP checksum unit test passes after this change. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index 3c43baca7b397..f705e5dd10742 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) " addc %0, %5, %0\n" " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" -" addib,< 0, %2, 1b\n" +" addib,> -1, %2, 1b\n" " addc %0, %3, %0\n" "\n" " extru %0, 31, 16, %4\n" -- GitLab From a5d32783a5665a1c63eb9d2bebb7f9001183e9ed Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Feb 2024 11:15:56 -0800 Subject: [PATCH 1166/1418] parisc: Fix csum_ipv6_magic on 32-bit systems [ Upstream commit 4408ba75e4ba80c91fde7e10bccccf388f5c09be ] Calculating the IPv6 checksum on 32-bit systems missed overflows when adding the proto+len fields into the checksum. This results in the following unit test failure. # test_csum_ipv6_magic: ASSERTION FAILED at lib/checksum_kunit.c:506 Expected ( u64)csum_result == ( u64)expected, but ( u64)csum_result == 46722 (0xb682) ( u64)expected == 46721 (0xb681) not ok 5 test_csum_ipv6_magic This is probably rarely seen in the real world because proto+len are usually small values which will rarely result in overflows when calculating the checksum. However, the unit test code uses large values for the length field, causing the test to fail. Fix the problem by adding the missing carry into the final checksum. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index f705e5dd10742..e619e67440db9 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -163,7 +163,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " ldw,ma 4(%2), %7\n" /* 4th daddr */ " addc %6, %0, %0\n" " addc %7, %0, %0\n" -" addc %3, %0, %0\n" /* fold in proto+len, catch carry */ +" addc %3, %0, %0\n" /* fold in proto+len */ +" addc 0, %0, %0\n" /* add carry */ #endif : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len), -- GitLab From 053bb9aab73dd2f39ae26e85c35d3e11f4c6ea82 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 13 Feb 2024 15:46:31 -0800 Subject: [PATCH 1167/1418] parisc: Fix csum_ipv6_magic on 64-bit systems [ Upstream commit 4b75b12d70506e31fc02356bbca60f8d5ca012d0 ] hppa 64-bit systems calculates the IPv6 checksum using 64-bit add operations. The last add folds protocol and length fields into the 64-bit result. While unlikely, this operation can overflow. The overflow can be triggered with a code sequence such as the following. /* try to trigger massive overflows */ memset(tmp_buf, 0xff, sizeof(struct in6_addr)); csum_result = csum_ipv6_magic((struct in6_addr *)tmp_buf, (struct in6_addr *)tmp_buf, 0xffff, 0xff, 0xffffffff); Fix the problem by adding any overflows from the final add operation into the calculated checksum. Fortunately, we can do this without additional cost by replacing the add operation used to fold the checksum into 32 bit with "add,dc" to add in the missing carry. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Reviewed-by: Charlie Jenkins Tested-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index e619e67440db9..c949aa20fa162 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -137,8 +137,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ " extrd,u %0, 31, 32, %4\n"/* copy upper half down */ " depdi 0, 31, 32, %0\n"/* clear upper half */ -" add %4, %0, %0\n" /* fold into 32-bits */ -" addc 0, %0, %0\n" /* add carry */ +" add,dc %4, %0, %0\n" /* fold into 32-bits, plus carry */ +" addc 0, %0, %0\n" /* add final carry */ #else -- GitLab From ceffd026f851a86a0bf6f1a9a11c514eb7803e4a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 27 Feb 2024 12:33:51 -0800 Subject: [PATCH 1168/1418] parisc: Strip upper 32 bit of sum in csum_ipv6_magic for 64-bit builds [ Upstream commit 0568b6f0d863643db2edcc7be31165740c89fa82 ] IPv6 checksum tests with unaligned addresses on 64-bit builds result in unexpected failures. Expected expected == csum_result, but expected == 46591 (0xb5ff) csum_result == 46381 (0xb52d) with alignment offset 1 Oddly enough, the problem disappeared after adding test code into the beginning of csum_ipv6_magic(). As it turns out, the 'sum' parameter of csum_ipv6_magic() is declared as __wsum, which is a 32-bit variable. However, it is treated as 64-bit variable in the 64-bit assembler code. Tests showed that the upper 32 bit of the register used to pass the variable are _not_ cleared when entering the function. This can result in checksum calculation errors. Clearing the upper 32 bit of 'sum' as first operation in the assembler code fixes the problem. Acked-by: Helge Deller Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index c949aa20fa162..2aceebcd695c8 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, ** Try to keep 4 registers with "live" values ahead of the ALU. */ +" depdi 0, 31, 32, %0\n"/* clear upper half of incoming checksum */ " ldd,ma 8(%1), %4\n" /* get 1st saddr word */ " ldd,ma 8(%2), %5\n" /* get 1st daddr word */ " add %4, %0, %0\n" -- GitLab From 9477cfeb300823461b44223a7d5fac26a31df4fe Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 12 Jan 2024 15:10:17 +0800 Subject: [PATCH 1169/1418] md/raid5: fix atomicity violation in raid5_cache_count [ Upstream commit dfd2bf436709b2bccb78c2dda550dde93700efa7 ] In raid5_cache_count(): if (conf->max_nr_stripes < conf->min_nr_stripes) return 0; return conf->max_nr_stripes - conf->min_nr_stripes; The current check is ineffective, as the values could change immediately after being checked. In raid5_set_cache_size(): ... conf->min_nr_stripes = size; ... while (size > conf->max_nr_stripes) conf->min_nr_stripes = conf->max_nr_stripes; ... Due to intermediate value updates in raid5_set_cache_size(), concurrent execution of raid5_cache_count() and raid5_set_cache_size() may lead to inconsistent reads of conf->max_nr_stripes and conf->min_nr_stripes. The current checks are ineffective as values could change immediately after being checked, raising the risk of conf->min_nr_stripes exceeding conf->max_nr_stripes and potentially causing an integer overflow. This possible bug is found by an experimental static analysis tool developed by our team. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 6.2. To resolve this issue, it is suggested to introduce local variables 'min_stripes' and 'max_stripes' in raid5_cache_count() to ensure the values remain stable throughout the check. Adding locks in raid5_cache_count() fails to resolve atomicity violations, as raid5_set_cache_size() may hold intermediate values of conf->min_nr_stripes while unlocked. With this patch applied, our tool no longer reports the bug, with the kernel configuration allyesconfig for x86_64. Due to the lack of associated hardware, we cannot test the patch in runtime testing, and just verify it according to the code logic. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240112071017.16313-1-2045gemini@gmail.com Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e4564ca1f2434..8cf2317857e0a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2420,7 +2420,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) atomic_inc(&conf->active_stripes); raid5_release_stripe(sh); - conf->max_nr_stripes++; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); return 1; } @@ -2717,7 +2717,7 @@ static int drop_one_stripe(struct r5conf *conf) shrink_buffers(sh); free_stripe(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); return 1; } @@ -6891,7 +6891,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) if (size <= 16 || size > 32768) return -EINVAL; - conf->min_nr_stripes = size; + WRITE_ONCE(conf->min_nr_stripes, size); mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) @@ -6903,7 +6903,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) { - conf->min_nr_stripes = conf->max_nr_stripes; + WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); result = -ENOMEM; break; } @@ -7468,11 +7468,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + int max_stripes = READ_ONCE(conf->max_nr_stripes); + int min_stripes = READ_ONCE(conf->min_nr_stripes); - if (conf->max_nr_stripes < conf->min_nr_stripes) + if (max_stripes < min_stripes) /* unlikely, but not impossible */ return 0; - return conf->max_nr_stripes - conf->min_nr_stripes; + return max_stripes - min_stripes; } static struct r5conf *setup_conf(struct mddev *mddev) -- GitLab From 56452891e262fdbcc07a4d7d66deef7c0e444f0c Mon Sep 17 00:00:00 2001 From: Shivnandan Kumar Date: Tue, 27 Feb 2024 14:43:51 +0530 Subject: [PATCH 1170/1418] cpufreq: Limit resolving a frequency to policy min/max [ Upstream commit d394abcb12bb1a6f309c1221fdb8e73594ecf1b4 ] Resolving a frequency to an efficient one should not transgress policy->max (which can be set for thermal reason) and policy->min. Currently, there is possibility where scaling_cur_freq can exceed scaling_max_freq when scaling_max_freq is an inefficient frequency. Add a check to ensure that resolving a frequency will respect policy->min/max. Cc: All applicable Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E") Signed-off-by: Shivnandan Kumar [ rjw: Whitespace adjustment, changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- include/linux/cpufreq.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e53..9d208648c84d5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1023,6 +1023,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1056,7 +1068,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } -- GitLab From a7b6523f92001a538b74edf85f1384cab4fada5a Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Thu, 29 Feb 2024 12:14:59 +0530 Subject: [PATCH 1171/1418] PM: suspend: Set mem_sleep_current during kernel command line setup [ Upstream commit 9bc4ffd32ef8943f5c5a42c9637cfd04771d021b ] psci_init_system_suspend() invokes suspend_set_ops() very early during bootup even before kernel command line for mem_sleep_default is setup. This leads to kernel command line mem_sleep_default=s2idle not working as mem_sleep_current gets changed to deep via suspend_set_ops() and never changes back to s2idle. Set mem_sleep_current along with mem_sleep_default during kernel command line setup as default suspend mode. Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Maulik Shah Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index fa3bf161d13f7..a718067deecee 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str) if (mem_sleep_labels[state] && !strcmp(str, mem_sleep_labels[state])) { mem_sleep_default = state; + mem_sleep_current = state; break; } -- GitLab From 852db52b45ea96dac2720f108e7c7331cd3738bb Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:47 +0100 Subject: [PATCH 1172/1418] clk: qcom: gcc-ipq6018: fix terminating of frequency table arrays [ Upstream commit cdbc6e2d8108bc47895e5a901cfcaf799b00ca8d ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: d9db07f088af ("clk: qcom: Add ipq6018 Global Clock Controller support") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-2-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq6018.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 4c5c7a8f41d08..b9844e41cf99d 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -1557,6 +1557,7 @@ static struct clk_regmap_div nss_ubi0_div_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(24000000, P_XO, 1, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_core_pi_sleep_clk[] = { @@ -1737,6 +1738,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(160000000, P_GPLL0, 5, 0, 0), F(216000000, P_GPLL6, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = { -- GitLab From dd92b159c506804ac57adf3742d9728298bb1255 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:48 +0100 Subject: [PATCH 1173/1418] clk: qcom: gcc-ipq8074: fix terminating of frequency table arrays [ Upstream commit 1040ef5ed95d6fd2628bad387d78a61633e09429 ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: 9607f6224b39 ("clk: qcom: ipq8074: add PCIE, USB and SDCC clocks") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-3-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq8074.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index b2e83b38976e5..b52c923a2fbca 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -973,6 +973,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + { } }; static struct clk_rcg2 pcie0_aux_clk_src = { @@ -1078,6 +1079,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(160000000, P_GPLL0, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static struct clk_rcg2 sdcc1_ice_core_clk_src = { -- GitLab From 185de0b7cdeaad8b89ebd4c8a258ff2f21adba99 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:51 +0100 Subject: [PATCH 1174/1418] clk: qcom: mmcc-apq8084: fix terminating of frequency table arrays [ Upstream commit a903cfd38d8dee7e754fb89fd1bebed99e28003d ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: 2b46cd23a5a2 ("clk: qcom: Add APQ8084 Multimedia Clock Controller (MMCC) support") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-6-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/mmcc-apq8084.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/mmcc-apq8084.c b/drivers/clk/qcom/mmcc-apq8084.c index e9f9713591558..5f373c10ec6ee 100644 --- a/drivers/clk/qcom/mmcc-apq8084.c +++ b/drivers/clk/qcom/mmcc-apq8084.c @@ -334,6 +334,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(333430000, P_MMPLL1, 3.5, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -358,6 +359,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(228570000, P_MMPLL0, 3.5, 0, 0), F(320000000, P_MMPLL0, 2.5, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { -- GitLab From 537040c257ab4cd0673fbae048f3940c8ea2e589 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:52 +0100 Subject: [PATCH 1175/1418] clk: qcom: mmcc-msm8974: fix terminating of frequency table arrays [ Upstream commit e2c02a85bf53ae86d79b5fccf0a75ac0b78e0c96 ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: d8b212014e69 ("clk: qcom: Add support for MSM8974's multimedia clock controller (MMCC)") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-7-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/mmcc-msm8974.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c index 17ed52046170a..eb2b0e2200d23 100644 --- a/drivers/clk/qcom/mmcc-msm8974.c +++ b/drivers/clk/qcom/mmcc-msm8974.c @@ -279,6 +279,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -303,6 +304,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { -- GitLab From b2c898469dfc388f619c6c972a28466cbb1442ea Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Thu, 29 Feb 2024 16:14:38 +0200 Subject: [PATCH 1176/1418] usb: xhci: Add error handling in xhci_map_urb_for_dma [ Upstream commit be95cc6d71dfd0cba66e3621c65413321b398052 ] Currently xhci_map_urb_for_dma() creates a temporary buffer and copies the SG list to the new linear buffer. But if the kzalloc_node() fails, then the following sg_pcopy_to_buffer() can lead to crash since it tries to memcpy to NULL pointer. So return -ENOMEM if kzalloc returns null pointer. Cc: stable@vger.kernel.org # 5.11 Fixes: 2017a1e58472 ("usb: xhci: Use temporary buffer to consolidate SG") Signed-off-by: Prashanth K Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240229141438.619372-10-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c02ad4f76bb3c..565aba6b99860 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1334,6 +1334,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb) temp = kzalloc_node(buf_len, GFP_ATOMIC, dev_to_node(hcd->self.sysdev)); + if (!temp) + return -ENOMEM; if (usb_urb_dir_out(urb)) sg_pcopy_to_buffer(urb->sg, urb->num_sgs, -- GitLab From 33a4aa08d5f0a816323510b40924f4bd90e28e64 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 29 Feb 2024 23:25:19 +1100 Subject: [PATCH 1177/1418] powerpc/fsl: Fix mfpmr build errors with newer binutils [ Upstream commit 5f491356b7149564ab22323ccce79c8d595bfd0c ] Binutils 2.38 complains about the use of mfpmr when building ppc6xx_defconfig: CC arch/powerpc/kernel/pmc.o {standard input}: Assembler messages: {standard input}:45: Error: unrecognized opcode: `mfpmr' {standard input}:56: Error: unrecognized opcode: `mtpmr' This is because by default the kernel is built with -mcpu=powerpc, and the mt/mfpmr instructions are not defined. It can be avoided by enabling CONFIG_E300C3_CPU, but just adding that to the defconfig will leave open the possibility of randconfig failures. So add machine directives around the mt/mfpmr instructions to tell binutils how to assemble them. Cc: stable@vger.kernel.org Reported-by: Jan-Benedict Glaw Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229122521.762431-3-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/reg_fsl_emb.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index a21f529c43d96..8359c06d92d9f 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -12,9 +12,16 @@ #ifndef __ASSEMBLY__ /* Performance Monitor Registers */ #define mfpmr(rn) ({unsigned int rval; \ - asm volatile("mfpmr %0," __stringify(rn) \ + asm volatile(".machine push; " \ + ".machine e300; " \ + "mfpmr %0," __stringify(rn) ";" \ + ".machine pop; " \ : "=r" (rval)); rval;}) -#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v)) +#define mtpmr(rn, v) asm volatile(".machine push; " \ + ".machine e300; " \ + "mtpmr " __stringify(rn) ",%0; " \ + ".machine pop; " \ + : : "r" (v)) #endif /* __ASSEMBLY__ */ /* Freescale Book E Performance Monitor APU Registers */ -- GitLab From a87209645494907009ef93f71bcca9b88a24183f Mon Sep 17 00:00:00 2001 From: Daniel Vogelbacher Date: Sun, 11 Feb 2024 15:42:46 +0100 Subject: [PATCH 1178/1418] USB: serial: ftdi_sio: add support for GMC Z216C Adapter IR-USB [ Upstream commit 3fb7bc4f3a98c48981318b87cf553c5f115fd5ca ] The GMC IR-USB adapter cable utilizes a FTDI FT232R chip. Add VID/PID for this adapter so it can be used as serial device via ftdi_sio. Signed-off-by: Daniel Vogelbacher Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index fe2173e37b061..248cbc9c48fd1 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1077,6 +1077,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + /* GMC devices */ + { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 21a2b5a25fc09..5ee60ba2a73cd 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1606,3 +1606,9 @@ #define UBLOX_VID 0x1546 #define UBLOX_C099F9P_ZED_PID 0x0502 #define UBLOX_C099F9P_ODIN_PID 0x0503 + +/* + * GMC devices + */ +#define GMC_VID 0x1cd7 +#define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ -- GitLab From b67095647f5bf6e4a198a5fd31dfe14b40da6cf4 Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Tue, 13 Feb 2024 21:53:29 +0000 Subject: [PATCH 1179/1418] USB: serial: add device ID for VeriFone adapter [ Upstream commit cda704809797a8a86284f9df3eef5e62ec8a3175 ] Add device ID for a (probably fake) CP2102 UART device. lsusb -v output: Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 1.10 bDeviceClass 0 [unknown] bDeviceSubClass 0 [unknown] bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x11ca VeriFone Inc idProduct 0x0212 Verifone USB to Printer bcdDevice 1.00 iManufacturer 1 Silicon Labs iProduct 2 Verifone USB to Printer iSerial 3 0001 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0020 bNumInterfaces 1 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 100mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 [unknown] bInterfaceProtocol 0 iInterface 2 Verifone USB to Printer Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Device Status: 0x0000 (Bus Powered) Signed-off-by: Cameron Williams Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index b3e60b3847941..bd0632e77d8b0 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -177,6 +177,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x11CA, 0x0212) }, /* Verifone USB to Printer (UART, CP2102) */ { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ -- GitLab From d0ab375e730a9ba32b9cd61a184ce8e749653c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20H=C3=A4ggstr=C3=B6m?= Date: Wed, 14 Feb 2024 11:47:29 +0100 Subject: [PATCH 1180/1418] USB: serial: cp210x: add ID for MGP Instruments PDS100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a0d9d868491a362d421521499d98308c8e3a0398 ] The radiation meter has the text MGP Instruments PDS-100G or PDS-100GN produced by Mirion Technologies. Tested by forcing the driver association with echo 10c4 863c > /sys/bus/usb-serial/drivers/cp210x/new_id and then setting the serial port in 115200 8N1 mode. The device announces ID_USB_VENDOR_ENC=Silicon\x20Labs and ID_USB_MODEL_ENC=PDS100 Signed-off-by: Christian Häggström Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index bd0632e77d8b0..e9ee8da8cc296 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -144,6 +144,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ + { USB_DEVICE(0x10C4, 0x863C) }, /* MGP Instruments PDS100 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ -- GitLab From 8093d6e928bd2a59b1a08ac439cf1a72deffea52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Jacobs?= Date: Wed, 31 Jan 2024 18:49:17 +0100 Subject: [PATCH 1181/1418] USB: serial: option: add MeiG Smart SLM320 product MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46809c51565b83881aede6cdf3b0d25254966a41 ] Update the USB serial option driver to support MeiG Smart SLM320. ID 2dee:4d41 UNISOC UNISOC-8910 T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2dee ProdID=4d41 Rev=00.00 S: Manufacturer=UNISOC S: Product=UNISOC-8910 C: #Ifs= 8 Cfg#= 1 Atr=e0 MxPwr=400mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Tested successfully a PPP LTE connection using If#= 0. Not sure of the purpose of every other serial interfaces. Signed-off-by: Aurélien Jacobs Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0a0cca65437f..1a3e5a9414f07 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -613,6 +613,11 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 +/* MeiG Smart Technology products */ +#define MEIGSMART_VENDOR_ID 0x2dee +/* MeiG Smart SLM320 based on UNISOC UIS8910 */ +#define MEIGSMART_PRODUCT_SLM320 0x4d41 + /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -2282,6 +2287,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- GitLab From ed85c3113ad1c623652ab8b0ea0f4a7f5e988f5e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Feb 2024 11:49:16 +0000 Subject: [PATCH 1182/1418] KVM: x86/xen: inject vCPU upcall vector when local APIC is enabled [ Upstream commit 8e62bf2bfa46367e14d0ffdcde5aada08759497c ] Linux guests since commit b1c3497e604d ("x86/xen: Add support for HVMOP_set_evtchn_upcall_vector") in v6.0 onwards will use the per-vCPU upcall vector when it's advertised in the Xen CPUID leaves. This upcall is injected through the guest's local APIC as an MSI, unlike the older system vector which was merely injected by the hypervisor any time the CPU was able to receive an interrupt and the upcall_pending flags is set in its vcpu_info. Effectively, that makes the per-CPU upcall edge triggered instead of level triggered, which results in the upcall being lost if the MSI is delivered when the local APIC is *disabled*. Xen checks the vcpu_info->evtchn_upcall_pending flag when the local APIC for a vCPU is software enabled (in fact, on any write to the SPIV register which doesn't disable the APIC). Do the same in KVM since KVM doesn't provide a way for userspace to intervene and trap accesses to the SPIV register of a local APIC emulated by KVM. Fixes: fde0451be8fb3 ("KVM: x86/xen: Support per-vCPU event channel upcall via local APIC") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240227115648.3104-3-dwmw2@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/lapic.c | 5 ++++- arch/x86/kvm/xen.c | 2 +- arch/x86/kvm/xen.h | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index edcf45e312b99..bfeafe4855528 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -40,6 +40,7 @@ #include "ioapic.h" #include "trace.h" #include "x86.h" +#include "xen.h" #include "cpuid.h" #include "hyperv.h" @@ -338,8 +339,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) } /* Check if there are APF page ready requests pending */ - if (enabled) + if (enabled) { kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); + kvm_xen_sw_enable_lapic(apic->vcpu); + } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index a58a426e6b1c0..684a39df60d9e 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -314,7 +314,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); } -static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; int r; diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 532a535a9e99f..500d9593a5a38 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -16,6 +16,7 @@ extern struct static_key_false_deferred kvm_xen_enabled; int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu); void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu); +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu); int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); @@ -33,6 +34,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ + /* + * The local APIC is being enabled. If the per-vCPU upcall vector is + * set and the vCPU's evtchn_upcall_pending flag is set, inject the + * interrupt. + */ + if (static_branch_unlikely(&kvm_xen_enabled.key) && + vcpu->arch.xen.vcpu_info_cache.active && + vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu)) + kvm_xen_inject_vcpu_vector(vcpu); +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -98,6 +112,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return false; -- GitLab From a43ebdbd39ce56992f5939bd78991f353f212f7a Mon Sep 17 00:00:00 2001 From: Toru Katagiri Date: Tue, 5 Mar 2024 08:46:14 +0900 Subject: [PATCH 1183/1418] USB: serial: cp210x: add pid/vid for TDK NC0110013M and MM0110113M [ Upstream commit b1a8da9ff1395c4879b4bd41e55733d944f3d613 ] TDK NC0110013M and MM0110113M have custom USB IDs for CP210x, so we need to add them to the driver. Signed-off-by: Toru Katagiri Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index e9ee8da8cc296..aa30288c8a8e0 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x04BF, 0x1301) }, /* TDK Corporation NC0110013M - Network Controller */ + { USB_DEVICE(0x04BF, 0x1303) }, /* TDK Corporation MM0110113M - i3 Micro Module */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ -- GitLab From 56a2038d00171bd903206256e30eba4c261505a2 Mon Sep 17 00:00:00 2001 From: Qingliang Li Date: Fri, 1 Mar 2024 17:26:57 +0800 Subject: [PATCH 1184/1418] PM: sleep: wakeirq: fix wake irq warning in system suspend [ Upstream commit e7a7681c859643f3f2476b2a28a494877fd89442 ] When driver uses pm_runtime_force_suspend() as the system suspend callback function and registers the wake irq with reverse enable ordering, the wake irq will be re-enabled when entering system suspend, triggering an 'Unbalanced enable for IRQ xxx' warning. In this scenario, the call sequence during system suspend is as follows: suspend_devices_and_enter() -> dpm_suspend_start() -> dpm_run_callback() -> pm_runtime_force_suspend() -> dev_pm_enable_wake_irq_check() -> dev_pm_enable_wake_irq_complete() -> suspend_enter() -> dpm_suspend_noirq() -> device_wakeup_arm_wake_irqs() -> dev_pm_arm_wake_irq() To fix this issue, complete the setting of WAKE_IRQ_DEDICATED_ENABLED flag in dev_pm_enable_wake_irq_complete() to avoid redundant irq enablement. Fixes: 8527beb12087 ("PM: sleep: wakeirq: fix wake irq arming") Reviewed-by: Dhruva Gole Signed-off-by: Qingliang Li Reviewed-by: Johan Hovold Cc: 5.16+ # 5.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/wakeirq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index afd094dec5ca3..ca0c092ba47fb 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -362,8 +362,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev) return; if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && - wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** -- GitLab From bdba49e46905e9d4c308deaee704cb7b8dc004d5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 5 Mar 2024 11:42:56 +0100 Subject: [PATCH 1185/1418] mmc: tmio: avoid concurrent runs of mmc_request_done() [ Upstream commit e8d1b41e69d72c62865bebe8f441163ec00b3d44 ] With the to-be-fixed commit, the reset_work handler cleared 'host->mrq' outside of the spinlock protected critical section. That leaves a small race window during execution of 'tmio_mmc_reset()' where the done_work handler could grab a pointer to the now invalid 'host->mrq'. Both would use it to call mmc_request_done() causing problems (see link below). However, 'host->mrq' cannot simply be cleared earlier inside the critical section. That would allow new mrqs to come in asynchronously while the actual reset of the controller still needs to be done. So, like 'tmio_mmc_set_ios()', an ERR_PTR is used to prevent new mrqs from coming in but still avoiding concurrency between work handlers. Reported-by: Dirk Behme Closes: https://lore.kernel.org/all/20240220061356.3001761-1-dirk.behme@de.bosch.com/ Fixes: df3ef2d3c92c ("mmc: protect the tmio_mmc driver against a theoretical race") Signed-off-by: Wolfram Sang Tested-by: Dirk Behme Reviewed-by: Dirk Behme Cc: stable@vger.kernel.org # 3.0+ Link: https://lore.kernel.org/r/20240305104423.3177-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/tmio_mmc_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 437048bb80273..5024cae411d3a 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work) else mrq->cmd->error = -ETIMEDOUT; + /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */ + host->mrq = ERR_PTR(-EBUSY); host->cmd = NULL; host->data = NULL; -- GitLab From 970e8c49f2cfe68792c408480f3fbda4b837457c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Feb 2024 16:50:49 +0100 Subject: [PATCH 1186/1418] fuse: fix root lookup with nonzero generation [ Upstream commit 68ca1b49e430f6534d0774a94147a823e3b8b26e ] The root inode has a fixed nodeid and generation (1, 0). Prior to the commit 15db16837a35 ("fuse: fix illegal access to inode with reused nodeid") generation number on lookup was ignored. After this commit lookup with the wrong generation number resulted in the inode being unhashed. This is correct for non-root inodes, but replacing the root inode is wrong and results in weird behavior. Fix by reverting to the old behavior if ignoring the generation for the root inode, but issuing a warning in dmesg. Reported-by: Antonio SJ Musumeci Closes: https://lore.kernel.org/all/CAOQ4uxhek5ytdN8Yz2tNEOg5ea4NkBb4nk0FGPjPk_9nz-VG3g@mail.gmail.com/ Fixes: 15db16837a35 ("fuse: fix illegal access to inode with reused nodeid") Cc: # v5.14 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5e408e7ec4c6b..936a24b646cef 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -399,6 +399,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; + if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { + pr_warn_once("root generation should be zero\n"); + outarg->generation = 0; + } *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, entry_attr_timeout(outarg), -- GitLab From 371f27c2c8fc3f41b4772c51300c954e796d223f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Feb 2024 16:50:49 +0100 Subject: [PATCH 1187/1418] fuse: don't unhash root [ Upstream commit b1fe686a765e6c0d71811d825b5a1585a202b777 ] The root inode is assumed to be always hashed. Do not unhash the root inode even if it is marked BAD. Fixes: 5d069dbe8aaf ("fuse: fix bad inode") Cc: # v5.11 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/fuse_i.h | 1 - fs/fuse/inode.c | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a9681fecbd91f..253b9b78d6f13 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -923,7 +923,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation, static inline void fuse_make_bad(struct inode *inode) { - remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f81000d968875..367e3b276092f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -445,8 +445,11 @@ retry: } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); - iput(inode); - goto retry; + if (inode != d_inode(sb->s_root)) { + remove_inode_hash(inode); + iput(inode); + goto retry; + } } fi = get_fuse_inode(inode); spin_lock(&fi->lock); -- GitLab From 2496e37ada362d94f709d789666775723e3fe5e0 Mon Sep 17 00:00:00 2001 From: Jameson Thies Date: Tue, 5 Mar 2024 02:58:01 +0000 Subject: [PATCH 1188/1418] usb: typec: ucsi: Clean up UCSI_CABLE_PROP macros [ Upstream commit 4d0a5a9915793377c0fe1a8d78de6bcd92cea963 ] Clean up UCSI_CABLE_PROP macros by fixing a bitmask shifting error for plug type and updating the modal support macro for consistent naming. Fixes: 3cf657f07918 ("usb: typec: ucsi: Remove all bit-fields") Cc: stable@vger.kernel.org Reviewed-by: Benson Leung Reviewed-by: Prashant Malani Reviewed-by: Dmitry Baryshkov Signed-off-by: Jameson Thies Link: https://lore.kernel.org/r/20240305025804.1290919-2-jthies@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 60ce9fb6e7450..dbb10cb310d4c 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -220,12 +220,12 @@ struct ucsi_cable_property { #define UCSI_CABLE_PROP_FLAG_VBUS_IN_CABLE BIT(0) #define UCSI_CABLE_PROP_FLAG_ACTIVE_CABLE BIT(1) #define UCSI_CABLE_PROP_FLAG_DIRECTIONALITY BIT(2) -#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) ((_f_) & GENMASK(3, 0)) +#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) (((_f_) & GENMASK(4, 3)) >> 3) #define UCSI_CABLE_PROPERTY_PLUG_TYPE_A 0 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_B 1 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_C 2 #define UCSI_CABLE_PROPERTY_PLUG_OTHER 3 -#define UCSI_CABLE_PROP_MODE_SUPPORT BIT(5) +#define UCSI_CABLE_PROP_FLAG_MODE_SUPPORT BIT(5) u8 latency; } __packed; -- GitLab From b8073c069a843f556469aab45254b099a4d969de Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 4 Mar 2024 13:43:49 -0800 Subject: [PATCH 1189/1418] serial: Lock console when calling into driver before registration [ Upstream commit 801410b26a0e8b8a16f7915b2b55c9528b69ca87 ] During the handoff from earlycon to the real console driver, we have two separate drivers operating on the same device concurrently. In the case of the 8250 driver these concurrent accesses cause problems due to the driver's use of banked registers, controlled by LCR.DLAB. It is possible for the setup(), config_port(), pm() and set_mctrl() callbacks to set DLAB, which can cause the earlycon code that intends to access TX to instead access DLL, leading to missed output and corruption on the serial line due to unintended modifications to the baud rate. In particular, for setup() we have: univ8250_console_setup() -> serial8250_console_setup() -> uart_set_options() -> serial8250_set_termios() -> serial8250_do_set_termios() -> serial8250_do_set_divisor() For config_port() we have: serial8250_config_port() -> autoconfig() For pm() we have: serial8250_pm() -> serial8250_do_pm() -> serial8250_set_sleep() For set_mctrl() we have (for some devices): serial8250_set_mctrl() -> omap8250_set_mctrl() -> __omap8250_set_mctrl() To avoid such problems, let's make it so that the console is locked during pre-registration calls to these callbacks, which will prevent the earlycon driver from running concurrently. Remove the partial solution to this problem in the 8250 driver that locked the console only during autoconfig_irq(), as this would result in a deadlock with the new approach. The console continues to be locked during autoconfig_irq() because it can only be called through uart_configure_port(). Although this patch introduces more locking than strictly necessary (and in particular it also locks during the call to rs485_config() which is not affected by this issue as far as I can tell), it follows the principle that it is the responsibility of the generic console code to manage the earlycon handoff by ensuring that earlycon and real console driver code cannot run concurrently, and not the individual drivers. Signed-off-by: Peter Collingbourne Reviewed-by: John Ogness Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b25532de55d8 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240304214350.501253-1-pcc@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_port.c | 6 ------ drivers/tty/serial/serial_core.c | 12 ++++++++++++ kernel/printk/printk.c | 21 ++++++++++++++++++--- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8efe31448df3c..c744feabd7cdd 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1377,9 +1377,6 @@ static void autoconfig_irq(struct uart_8250_port *up) inb_p(ICP); } - if (uart_console(port)) - console_lock(); - /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); save_mcr = serial8250_in_MCR(up); @@ -1410,9 +1407,6 @@ static void autoconfig_irq(struct uart_8250_port *up) if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); - if (uart_console(port)) - console_unlock(); - port->irq = (irq > 0) ? irq : 0; } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f0ed30d0a697c..fe3f1d655dfe2 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2561,7 +2561,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->type = PORT_UNKNOWN; flags |= UART_CONFIG_TYPE; } + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); port->ops->config_port(port, flags); + if (uart_console(port)) + console_unlock(); } if (port->type != PORT_UNKNOWN) { @@ -2569,6 +2574,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_report_port(drv, port); + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); + /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); @@ -2585,6 +2594,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_rs485_config(port); + if (uart_console(port)) + console_unlock(); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 981cdb00b8722..c55ee859dbd08 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3045,6 +3045,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -3080,8 +3095,8 @@ static int try_enable_preferred_console(struct console *newcon, if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; @@ -3107,7 +3122,7 @@ static void try_enable_default_console(struct console *newcon) if (newcon->index < 0) newcon->index = 0; - if (newcon->setup && newcon->setup(newcon, NULL) != 0) + if (console_call_setup(newcon, NULL) != 0) return; newcon->flags |= CON_ENABLED; -- GitLab From c13e7256571fde61836ec133b3c705a7435f47df Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 23 Feb 2024 18:13:38 +1030 Subject: [PATCH 1190/1418] btrfs: qgroup: always free reserved space for extent records [ Upstream commit d139ded8b9cdb897bb9539eb33311daf9a177fd2 ] [BUG] If qgroup is marked inconsistent (e.g. caused by operations needing full subtree rescan, like creating a snapshot and assign to a higher level qgroup), btrfs would immediately start leaking its data reserved space. The following script can easily reproduce it: mkfs.btrfs -O quota -f $dev mount $dev $mnt btrfs subvolume create $mnt/subv1 btrfs qgroup create 1/0 $mnt # This snapshot creation would mark qgroup inconsistent, # as the ownership involves different higher level qgroup, thus # we have to rescan both source and snapshot, which can be very # time consuming, thus here btrfs just choose to mark qgroup # inconsistent, and let users to determine when to do the rescan. btrfs subv snapshot -i 1/0 $mnt/subv1 $mnt/snap1 # Now this write would lead to qgroup rsv leak. xfs_io -f -c "pwrite 0 64k" $mnt/file1 # And at unmount time, btrfs would report 64K DATA rsv space leaked. umount $mnt And we would have the following dmesg output for the unmount: BTRFS info (device dm-1): last unmount of filesystem 14a3d84e-f47b-4f72-b053-a8a36eef74d3 BTRFS warning (device dm-1): qgroup 0/5 has unreleased space, type 0 rsv 65536 [CAUSE] Since commit e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting"), we introduce a mode for btrfs qgroup to skip the timing consuming backref walk, if the qgroup is already inconsistent. But this skip also covered the data reserved freeing, thus the qgroup reserved space for each newly created data extent would not be freed, thus cause the leakage. [FIX] Make the data extent reserved space freeing mandatory. The qgroup reserved space handling is way cheaper compared to the backref walking part, and we always have the super sensitive leak detector, thus it's definitely worth to always free the qgroup reserved data space. Reported-by: Fabian Vogt Fixes: e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting") CC: stable@vger.kernel.org # 6.1+ Link: https://bugzilla.suse.com/show_bug.cgi?id=1216196 Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b3472bf6b288f..c14d4f70e84bd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2800,11 +2800,6 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) goto cleanup; } - /* Free the reserved data space */ - btrfs_qgroup_free_refroot(fs_info, - record->data_rsv_refroot, - record->data_rsv, - BTRFS_QGROUP_RSV_DATA); /* * Use BTRFS_SEQ_LAST as time_seq to do special search, * which doesn't lock tree or delayed_refs and search @@ -2826,6 +2821,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) record->old_roots = NULL; new_roots = NULL; } + /* Free the reserved data space */ + btrfs_qgroup_free_refroot(fs_info, + record->data_rsv_refroot, + record->data_rsv, + BTRFS_QGROUP_RSV_DATA); cleanup: ulist_free(record->old_roots); ulist_free(new_roots); -- GitLab From 50361c2af7561c6ce25795bddc710fddfc948a2d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 29 Feb 2024 10:37:04 +0000 Subject: [PATCH 1191/1418] btrfs: fix off-by-one chunk length calculation at contains_pending_extent() [ Upstream commit ae6bd7f9b46a29af52ebfac25d395757e2031d0d ] At contains_pending_extent() the value of the end offset of a chunk we found in the device's allocation state io tree is inclusive, so when we calculate the length we pass to the in_range() macro, we must sum 1 to the expression "physical_end - physical_offset". In practice the wrong calculation should be harmless as chunks sizes are never 1 byte and we should never have 1 byte ranges of unallocated space. Nevertheless fix the wrong calculation. Reported-by: Alex Lyakas Link: https://lore.kernel.org/linux-btrfs/CAOcd+r30e-f4R-5x-S7sV22RJPe7+pgwherA6xqN2_qe7o4XTg@mail.gmail.com/ Fixes: 1c11b63eff2a ("btrfs: replace pending/pinned chunks lists with io tree") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fc2d99270c18..03cfb425ea4ea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1444,7 +1444,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, if (in_range(physical_start, *start, len) || in_range(*start, physical_start, - physical_end - physical_start)) { + physical_end + 1 - physical_start)) { *start = physical_end + 1; return true; } -- GitLab From 900b81caf00c89417172afe0e7e49ac4eb110f4b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Mar 2024 11:45:38 +0100 Subject: [PATCH 1192/1418] PCI/PM: Drain runtime-idle callbacks before driver removal [ Upstream commit 9d5286d4e7f68beab450deddbb6a32edd5ecf4bf ] A race condition between the .runtime_idle() callback and the .remove() callback in the rtsx_pcr PCI driver leads to a kernel crash due to an unhandled page fault [1]. The problem is that rtsx_pci_runtime_idle() is not expected to be running after pm_runtime_get_sync() has been called, but the latter doesn't really guarantee that. It only guarantees that the suspend and resume callbacks will not be running when it returns. However, if a .runtime_idle() callback is already running when pm_runtime_get_sync() is called, the latter will notice that the runtime PM status of the device is RPM_ACTIVE and it will return right away without waiting for the former to complete. In fact, it cannot wait for .runtime_idle() to complete because it may be called from that callback (it arguably does not make much sense to do that, but it is not strictly prohibited). Thus in general, whoever is providing a .runtime_idle() callback needs to protect it from running in parallel with whatever code runs after pm_runtime_get_sync(). [Note that .runtime_idle() will not start after pm_runtime_get_sync() has returned, but it may continue running then if it has started earlier.] One way to address that race condition is to call pm_runtime_barrier() after pm_runtime_get_sync() (not before it, because a nonzero value of the runtime PM usage counter is necessary to prevent runtime PM callbacks from being invoked) to wait for the .runtime_idle() callback to complete should it be running at that point. A suitable place for doing that is in pci_device_remove() which calls pm_runtime_get_sync() before removing the driver, so it may as well call pm_runtime_barrier() subsequently, which will prevent the race in question from occurring, not just in the rtsx_pcr driver, but in any PCI drivers providing .runtime_idle() callbacks. Link: https://lore.kernel.org/lkml/20240229062201.49500-1-kai.heng.feng@canonical.com/ # [1] Link: https://lore.kernel.org/r/5761426.DvuYhMxLoT@kreacher Reported-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Tested-by: Ricky Wu Acked-by: Kai-Heng Feng Cc: Signed-off-by: Sasha Levin --- drivers/pci/pci-driver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f47a3b10bf504..8dda3b205dfd0 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -473,6 +473,13 @@ static void pci_device_remove(struct device *dev) if (drv->remove) { pm_runtime_get_sync(dev); + /* + * If the driver provides a .runtime_idle() callback and it has + * started to run already, it may continue to run in parallel + * with the code below, so wait until all of the runtime PM + * activity has completed. + */ + pm_runtime_barrier(dev); drv->remove(pci_dev); pm_runtime_put_noidle(dev); } -- GitLab From c9ef367b3e3984721d59252725fd47b4199aaa6c Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 5 Mar 2024 12:30:56 +0100 Subject: [PATCH 1193/1418] PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports [ Upstream commit 627c6db20703b5d18d928464f411d0d4ec327508 ] Commit 5459c0b70467 ("PCI/DPC: Quirk PIO log size for certain Intel Root Ports") and commit 3b8803494a06 ("PCI/DPC: Quirk PIO log size for Intel Ice Lake Root Ports") add quirks for Ice, Tiger and Alder Lake Root Ports. System firmware for Raptor Lake still has the bug, so Linux logs the warning below on several Raptor Lake systems like Dell Precision 3581 with Intel Raptor Lake processor (0W18NX) system firmware/BIOS version 1.10.1. pci 0000:00:07.0: [8086:a76e] type 01 class 0x060400 pci 0000:00:07.0: DPC: RP PIO log size 0 is invalid pci 0000:00:07.1: [8086:a73f] type 01 class 0x060400 pci 0000:00:07.1: DPC: RP PIO log size 0 is invalid Apply the quirk for Raptor Lake Root Ports as well. This also enables the DPC driver to dump the RP PIO Log registers when DPC is triggered. Link: https://lore.kernel.org/r/20240305113057.56468-1-pmenzel@molgen.mpg.de Reported-by: Niels van Aert Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218560 Signed-off-by: Paul Menzel Signed-off-by: Bjorn Helgaas Cc: Cc: Mika Westerberg Cc: Niels van Aert Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c175b70a984c6..289ba6902e41b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6078,6 +6078,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); #endif /* -- GitLab From aab8a0745f872d8bdfe7552fd721af17e42bda57 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:06 +0800 Subject: [PATCH 1194/1418] dm-raid: fix lockdep waring in "pers->hot_add_disk" [ Upstream commit 95009ae904b1e9dca8db6f649f2d7c18a6e42c75 ] The lockdep assert is added by commit a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") in print_conf(). And I didn't notice that dm-raid is calling "pers->hot_add_disk" without holding 'reconfig_mutex'. "pers->hot_add_disk" read and write many fields that is protected by 'reconfig_mutex', and raid_resume() already grab the lock in other contex. Hence fix this problem by protecting "pers->host_add_disk" with the lock. Fixes: 9092c02d9435 ("DM RAID: Add ability to restore transiently failed devices on resume") Fixes: a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-10-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index bf833ca880bc1..99b4738e867a8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -4046,7 +4046,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { -- GitLab From f299404fd869ccb0c95572d8ecebfb11ad532e5a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 27 Jan 2024 11:07:43 -0700 Subject: [PATCH 1195/1418] powerpc: xor_vmx: Add '-mhard-float' to CFLAGS [ Upstream commit 35f20786c481d5ced9283ff42de5c69b65e5ed13 ] arch/powerpc/lib/xor_vmx.o is built with '-msoft-float' (from the main powerpc Makefile) and '-maltivec' (from its CFLAGS), which causes an error when building with clang after a recent change in main: error: option '-msoft-float' cannot be specified with '-maltivec' make[6]: *** [scripts/Makefile.build:243: arch/powerpc/lib/xor_vmx.o] Error 1 Explicitly add '-mhard-float' before '-maltivec' in xor_vmx.o's CFLAGS to override the previous inclusion of '-msoft-float' (as the last option wins), which matches how other areas of the kernel use '-maltivec', such as AMDGPU. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1986 Link: https://github.com/llvm/llvm-project/commit/4792f912b232141ecba4cbae538873be3c28556c Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://msgid.link/20240127-ppc-xor_vmx-drop-msoft-float-v1-1-f24140e81376@kernel.org Signed-off-by: Sasha Levin --- arch/powerpc/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9b394bab17eba..374b82cf13d9d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -72,7 +72,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) # Enable CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) -- GitLab From dcd51ab42b7a0431575689c5f74b8b6efd45fc2f Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 28 Feb 2024 19:38:39 +0300 Subject: [PATCH 1196/1418] mac802154: fix llsec key resources release in mac802154_llsec_key_del [ Upstream commit e8a1e58345cf40b7b272e08ac7b32328b2543e40 ] mac802154_llsec_key_del() can free resources of a key directly without following the RCU rules for waiting before the end of a grace period. This may lead to use-after-free in case llsec_lookup_key() is traversing the list of keys in parallel with a key deletion: refcount_t: addition on 0; use-after-free. WARNING: CPU: 4 PID: 16000 at lib/refcount.c:25 refcount_warn_saturate+0x162/0x2a0 Modules linked in: CPU: 4 PID: 16000 Comm: wpan-ping Not tainted 6.7.0 #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x162/0x2a0 Call Trace: llsec_lookup_key.isra.0+0x890/0x9e0 mac802154_llsec_encrypt+0x30c/0x9c0 ieee802154_subif_start_xmit+0x24/0x1e0 dev_hard_start_xmit+0x13e/0x690 sch_direct_xmit+0x2ae/0xbc0 __dev_queue_xmit+0x11dd/0x3c20 dgram_sendmsg+0x90b/0xd60 __sys_sendto+0x466/0x4c0 __x64_sys_sendto+0xe0/0x1c0 do_syscall_64+0x45/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Also, ieee802154_llsec_key_entry structures are not freed by mac802154_llsec_key_del(): unreferenced object 0xffff8880613b6980 (size 64): comm "iwpan", pid 2176, jiffies 4294761134 (age 60.475s) hex dump (first 32 bytes): 78 0d 8f 18 80 88 ff ff 22 01 00 00 00 00 ad de x......."....... 00 00 00 00 00 00 00 00 03 00 cd ab 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1e2/0x2d0 [] kmalloc_trace+0x25/0xc0 [] mac802154_llsec_key_add+0xac9/0xcf0 [] ieee802154_add_llsec_key+0x5a/0x80 [] nl802154_add_llsec_key+0x426/0x5b0 [] genl_family_rcv_msg_doit+0x1fe/0x2f0 [] genl_rcv_msg+0x531/0x7d0 [] netlink_rcv_skb+0x169/0x440 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x53c/0x820 [] netlink_sendmsg+0x93b/0xe60 [] ____sys_sendmsg+0xac5/0xca0 [] ___sys_sendmsg+0x11d/0x1c0 [] __sys_sendmsg+0xfa/0x1d0 [] do_syscall_64+0x45/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Handle the proper resource release in the RCU callback function mac802154_llsec_key_del_rcu(). Note that if llsec_lookup_key() finds a key, it gets a refcount via llsec_key_get() and locally copies key id from key_entry (which is a list element). So it's safe to call llsec_key_put() and free the list entry after the RCU grace period elapses. Found by Linux Verification Center (linuxtesting.org). Fixes: 5d637d5aabd8 ("mac802154: add llsec structures and mutators") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Alexander Aring Message-ID: <20240228163840.6667-1-pchelkin@ispras.ru> Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- include/net/cfg802154.h | 1 + net/mac802154/llsec.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d8d8719315fd8..5f7f28c9edcb6 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -267,6 +267,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 55550ead2ced8..a4cc9d077c59c 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -265,19 +265,27 @@ fail: return -ENOMEM; } +static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) +{ + struct ieee802154_llsec_key_entry *pos; + struct mac802154_llsec_key *mkey; + + pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + llsec_key_put(mkey); + kfree_sensitive(pos); +} + int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { - struct mac802154_llsec_key *mkey; - - mkey = container_of(pos->key, struct mac802154_llsec_key, key); - if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); - llsec_key_put(mkey); + call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } -- GitLab From 509546fc034747b91edb75697e8f6d2089bdd270 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 29 May 2023 14:13:55 +0800 Subject: [PATCH 1197/1418] swap: comments get_swap_device() with usage rule [ Upstream commit a95722a047724ef75567381976a36f0e44230bd9 ] The general rule to use a swap entry is as follows. When we get a swap entry, if there aren't some other ways to prevent swapoff, such as the folio in swap cache is locked, page table lock is held, etc., the swap entry may become invalid because of swapoff. Then, we need to enclose all swap related functions with get_swap_device() and put_swap_device(), unless the swap functions call get/put_swap_device() by themselves. Add the rule as comments of get_swap_device(). Link: https://lkml.kernel.org/r/20230529061355.125791-6-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: David Hildenbrand Reviewed-by: Yosry Ahmed Reviewed-by: Chris Li (Google) Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Michal Hocko Cc: Minchan Kim Cc: Tim Chen Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton Stable-dep-of: 82b1c07a0af6 ("mm: swap: fix race between free_swap_and_cache() and swapoff()") Signed-off-by: Sasha Levin --- mm/swapfile.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index cca9fda9d036f..324844f98d67c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1222,6 +1222,13 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } /* + * When we get a swap entry, if there aren't some other ways to + * prevent swapoff, such as the folio in swap cache is locked, page + * table lock is held, etc., the swap entry may become invalid because + * of swapoff. Then, we need to enclose all swap related functions + * with get_swap_device() and put_swap_device(), unless the swap + * functions call get/put_swap_device() by themselves. + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1230,9 +1237,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way - * to prevent swapoff, such as page lock, page table lock, etc. The - * caller must be prepared for that. For example, the following - * situation is possible. + * to prevent swapoff. The caller must be prepared for that. For + * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() -- GitLab From 1ede7f1d7eed1738d1b9333fd1e152ccb450b86a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 6 Mar 2024 14:03:56 +0000 Subject: [PATCH 1198/1418] mm: swap: fix race between free_swap_and_cache() and swapoff() [ Upstream commit 82b1c07a0af603e3c47b906c8e991dc96f01688e ] There was previously a theoretical window where swapoff() could run and teardown a swap_info_struct while a call to free_swap_and_cache() was running in another thread. This could cause, amongst other bad possibilities, swap_page_trans_huge_swapped() (called by free_swap_and_cache()) to access the freed memory for swap_map. This is a theoretical problem and I haven't been able to provoke it from a test case. But there has been agreement based on code review that this is possible (see link below). Fix it by using get_swap_device()/put_swap_device(), which will stall swapoff(). There was an extra check in _swap_info_get() to confirm that the swap entry was not free. This isn't present in get_swap_device() because it doesn't make sense in general due to the race between getting the reference and swapoff. So I've added an equivalent check directly in free_swap_and_cache(). Details of how to provoke one possible issue (thanks to David Hildenbrand for deriving this): --8<----- __swap_entry_free() might be the last user and result in "count == SWAP_HAS_CACHE". swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0. So the question is: could someone reclaim the folio and turn si->inuse_pages==0, before we completed swap_page_trans_huge_swapped(). Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are still references by swap entries. Process 1 still references subpage 0 via swap entry. Process 2 still references subpage 1 via swap entry. Process 1 quits. Calls free_swap_and_cache(). -> count == SWAP_HAS_CACHE [then, preempted in the hypervisor etc.] Process 2 quits. Calls free_swap_and_cache(). -> count == SWAP_HAS_CACHE Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls __try_to_reclaim_swap(). __try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()-> put_swap_folio()->free_swap_slot()->swapcache_free_entries()-> swap_entry_free()->swap_range_free()-> ... WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries); What stops swapoff to succeed after process 2 reclaimed the swap cache but before process1 finished its call to swap_page_trans_huge_swapped()? --8<----- Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch") Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redhat.com/ Signed-off-by: Ryan Roberts Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/swapfile.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 324844f98d67c..0d6182db44a6a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1229,6 +1229,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * with get_swap_device() and put_swap_device(), unless the swap * functions call get/put_swap_device() by themselves. * + * Note that when only holding the PTL, swapoff might succeed immediately + * after freeing a swap entry. Therefore, immediately after + * __swap_entry_free(), the swap info might become stale and should not + * be touched without a prior get_swap_device(). + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1630,13 +1635,19 @@ int free_swap_and_cache(swp_entry_t entry) if (non_swap_entry(entry)) return 1; - p = _swap_info_get(entry); + p = get_swap_device(entry); if (p) { + if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) { + put_swap_device(p); + return 0; + } + count = __swap_entry_free(p, entry); if (count == SWAP_HAS_CACHE && !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + put_swap_device(p); } return p != NULL; } -- GitLab From ad8a4eb663521323f1a57a72c4a63ffabe24e1ac Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 6 Mar 2024 10:44:38 +0900 Subject: [PATCH 1199/1418] mmc: core: Fix switch on gp3 partition [ Upstream commit 4af59a8df5ea930038cd3355e822f5eedf4accc1 ] Commit e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.") added a mask check for 'part_type', but the mask used was wrong leading to the code intended for rpmb also being executed for GP3. On some MMCs (but not all) this would make gp3 partition inaccessible: armadillo:~# head -c 1 < /dev/mmcblk2gp3 head: standard input: I/O error armadillo:~# dmesg -c [ 422.976583] mmc2: running CQE recovery [ 423.058182] mmc2: running CQE recovery [ 423.137607] mmc2: running CQE recovery [ 423.137802] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 4 prio class 0 [ 423.237125] mmc2: running CQE recovery [ 423.318206] mmc2: running CQE recovery [ 423.397680] mmc2: running CQE recovery [ 423.397837] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0 [ 423.408287] Buffer I/O error on dev mmcblk2gp3, logical block 0, async page read the part_type values of interest here are defined as follow: main 0 boot0 1 boot1 2 rpmb 3 gp0 4 gp1 5 gp2 6 gp3 7 so mask with EXT_CSD_PART_CONFIG_ACC_MASK (7) to correctly identify rpmb Fixes: e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.") Cc: stable@vger.kernel.org Cc: Jorge Ramirez-Ortiz Signed-off-by: Dominique Martinet Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240306-mmc-partswitch-v1-1-bf116985d950@codewreck.org Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/block.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ea60efaecb0dd..4688a658d6a6d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -889,10 +889,11 @@ static const struct block_device_operations mmc_bdops = { static int mmc_blk_part_switch_pre(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { if (card->ext_csd.cmdq_en) { ret = mmc_cmdq_disable(card); if (ret) @@ -907,10 +908,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card, static int mmc_blk_part_switch_post(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { mmc_retune_unpause(card->host); if (card->reenable_cmdq && !card->ext_csd.cmdq_en) ret = mmc_cmdq_enable(card); -- GitLab From 03f58a64794aaee56df2de37e53c3610dd151f76 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Fri, 1 Mar 2024 14:28:11 +0100 Subject: [PATCH 1200/1418] drm/etnaviv: Restore some id values [ Upstream commit b735ee173f84d5d0d0733c53946a83c12d770d05 ] The hwdb selection logic as a feature that allows it to mark some fields as 'don't care'. If we match with such a field we memcpy(..) the current etnaviv_chip_identity into ident. This step can overwrite some id values read from the GPU with the 'don't care' value. Fix this issue by restoring the affected values after the memcpy(..). As this is crucial for user space to know when this feature works as expected increment the minor version too. Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic") Cc: stable@vger.kernel.org Signed-off-by: Christian Gmeiner Reviewed-by: Tomeu Vizoso Signed-off-by: Lucas Stach Signed-off-by: Sasha Levin --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 1d2b4fb4bcf8b..f29952a55c05d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,7 @@ static const struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 3, + .minor = 4, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index f2fc645c79569..212e7050c4ba6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -135,6 +135,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) { struct etnaviv_chip_identity *ident = &gpu->identity; + const u32 product_id = ident->product_id; + const u32 customer_id = ident->customer_id; + const u32 eco_id = ident->eco_id; int i; for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) { @@ -148,6 +151,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) etnaviv_chip_identities[i].eco_id == ~0U)) { memcpy(ident, &etnaviv_chip_identities[i], sizeof(*ident)); + + /* Restore some id values as ~0U aka 'don't care' might been used. */ + ident->product_id = product_id; + ident->customer_id = customer_id; + ident->eco_id = eco_id; + return true; } } -- GitLab From acda20add47677df58b779550d2b63a89133b7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 27 Feb 2024 12:05:50 +0100 Subject: [PATCH 1201/1418] landlock: Warn once if a Landlock action is requested while disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 782191c74875cc33b50263e21d76080b1411884d ] Because sandboxing can be used as an opportunistic security measure, user space may not log unsupported features. Let the system administrator know if an application tries to use Landlock but failed because it isn't enabled at boot time. This may be caused by boot loader configurations with outdated "lsm" kernel's command-line parameter. Cc: stable@vger.kernel.org Fixes: 265885daf3e5 ("landlock: Add syscall implementations") Reviewed-by: Kees Cook Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20240227110550.3702236-2-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Sasha Levin --- security/landlock/syscalls.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 2ca0ccbd905ae..d0cb3d0cbf985 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -32,6 +32,18 @@ #include "ruleset.h" #include "setup.h" +static bool is_initialized(void) +{ + if (likely(landlock_initialized)) + return true; + + pr_warn_once( + "Disabled but requested by user space. " + "You should enable Landlock at boot time: " + "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); + return false; +} + /** * copy_min_struct_from_user - Safe future-proof argument copying * @@ -165,7 +177,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, /* Build-time checks. */ build_check_abi(); - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; if (flags) { @@ -311,7 +323,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, struct landlock_ruleset *ruleset; int res, err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* No flag for now. */ @@ -402,7 +414,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, struct landlock_cred_security *new_llcred; int err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* -- GitLab From b04abf51d72505f6a102d53788200e8a38afc29d Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Thu, 7 Mar 2024 12:06:58 +0100 Subject: [PATCH 1202/1418] hwmon: (amc6821) add of_match table [ Upstream commit 3f003fda98a7a8d5f399057d92e6ed56b468657c ] Add of_match table for "ti,amc6821" compatible string. This fixes automatic driver loading by userspace when using device-tree, and if built as a module like major linux distributions do. While devices probe just fine with i2c_device_id table, userspace can't match the "ti,amc6821" compatible string from dt with the plain "amc6821" device id. As a result, the kernel module can not be loaded. Cc: stable@vger.kernel.org Signed-off-by: Josua Mayer Link: https://lore.kernel.org/r/20240307-amc6821-of-match-v1-1-5f40464a3110@solid-run.com [groeck: Cleaned up patch description] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/amc6821.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 3bfd12ff4b3ca..6868db4ac84f3 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -934,10 +934,21 @@ static const struct i2c_device_id amc6821_id[] = { MODULE_DEVICE_TABLE(i2c, amc6821_id); +static const struct of_device_id __maybe_unused amc6821_of_match[] = { + { + .compatible = "ti,amc6821", + .data = (void *)amc6821, + }, + { } +}; + +MODULE_DEVICE_TABLE(of, amc6821_of_match); + static struct i2c_driver amc6821_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "amc6821", + .of_match_table = of_match_ptr(amc6821_of_match), }, .probe_new = amc6821_probe, .id_table = amc6821_id, -- GitLab From fb1088d51bbaa0faec5a55d4f5818a9ab79e24df Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Thu, 15 Feb 2024 15:50:09 +0000 Subject: [PATCH 1203/1418] ext4: fix corruption during on-line resize [ Upstream commit a6b3bfe176e8a5b05ec4447404e412c2a3fc92cc ] We observed a corruption during on-line resize of a file system that is larger than 16 TiB with 4k block size. With having more then 2^32 blocks resize_inode is turned off by default by mke2fs. The issue can be reproduced on a smaller file system for convenience by explicitly turning off resize_inode. An on-line resize across an 8 GiB boundary (the size of a meta block group in this setup) then leads to a corruption: dev=/dev/ # should be >= 16 GiB mkdir -p /corruption /sbin/mke2fs -t ext4 -b 4096 -O ^resize_inode $dev $((2 * 2**21 - 2**15)) mount -t ext4 $dev /corruption dd if=/dev/zero bs=4096 of=/corruption/test count=$((2*2**21 - 4*2**15)) sha1sum /corruption/test # 79d2658b39dcfd77274e435b0934028adafaab11 /corruption/test /sbin/resize2fs $dev $((2*2**21)) # drop page cache to force reload the block from disk echo 1 > /proc/sys/vm/drop_caches sha1sum /corruption/test # 3c2abc63cbf1a94c9e6977e0fbd72cd832c4d5c3 /corruption/test 2^21 = 2^15*2^6 equals 8 GiB whereof 2^15 is the number of blocks per block group and 2^6 are the number of block groups that make a meta block group. The last checksum might be different depending on how the file is laid out across the physical blocks. The actual corruption occurs at physical block 63*2^15 = 2064384 which would be the location of the backup of the meta block group's block descriptor. During the on-line resize the file system will be converted to meta_bg starting at s_first_meta_bg which is 2 in the example - meaning all block groups after 16 GiB. However, in ext4_flex_group_add we might add block groups that are not part of the first meta block group yet. In the reproducer we achieved this by substracting the size of a whole block group from the point where the meta block group would start. This must be considered when updating the backup block group descriptors to follow the non-meta_bg layout. The fix is to add a test whether the group to add is already part of the meta block group or not. Fixes: 01f795f9e0d67 ("ext4: add online resizing support for meta_bg and 64-bit file systems") Cc: Signed-off-by: Maximilian Heyne Tested-by: Srivathsa Dara Reviewed-by: Srivathsa Dara Link: https://lore.kernel.org/r/20240215155009.94493-1-mheyne@amazon.de Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f2ed15af703a8..38ce42396758d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1606,7 +1606,8 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = ext4_has_feature_meta_bg(sb); + int meta_bg = ext4_has_feature_meta_bg(sb) && + gdb_num >= le32_to_cpu(es->s_first_meta_bg); sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ext4_group_first_block_no(sb, 0); sector_t old_gdb = 0; -- GitLab From 08eceec7931eb03956fec7910bc798c2cbc4586b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Sat, 24 Feb 2024 11:40:23 +0000 Subject: [PATCH 1204/1418] nvmem: meson-efuse: fix function pointer type mismatch [ Upstream commit cbd38332c140829ab752ba4e727f98be5c257f18 ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: drivers/nvmem/meson-efuse.c:78:12: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 78 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 611fbca1c861 ("nvmem: meson-efuse: add peripheral clock") Cc: Stable@vger.kernel.org Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Martin Blumenstingl Acked-by: Arnd Bergmann Reviewed-by: Justin Stitt Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240224114023.85535-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/nvmem/meson-efuse.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index d6b533497ce1a..ba2714bef8d0e 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -47,7 +47,6 @@ static int meson_efuse_probe(struct platform_device *pdev) struct nvmem_config *econfig; struct clk *clk; unsigned int size; - int ret; sm_np = of_parse_phandle(pdev->dev.of_node, "secure-monitor", 0); if (!sm_np) { @@ -60,27 +59,9 @@ static int meson_efuse_probe(struct platform_device *pdev) if (!fw) return -EPROBE_DEFER; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get efuse gate"); - return ret; - } - - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "failed to enable gate"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - clk); - if (ret) { - dev_err(dev, "failed to add disable callback"); - return ret; - } + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to get efuse gate"); if (meson_sm_call(fw, SM_EFUSE_USER_MAX, &size, 0, 0, 0, 0, 0) < 0) { dev_err(dev, "failed to get max user"); -- GitLab From b4139fe933df78215b8d60b8c4f703bd7553edc2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 24 Feb 2024 11:41:37 +0000 Subject: [PATCH 1205/1418] slimbus: core: Remove usage of the deprecated ida_simple_xx() API [ Upstream commit 89ffa4cccec54467446f141a79b9e36893079fb8 ] ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_range() is inclusive. So change this change allows one more device. Previously address 0xFE was never used. Fixes: 46a2bb5a7f7e ("slimbus: core: Add slim controllers support") Cc: Stable@vger.kernel.org Signed-off-by: Christophe JAILLET Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240224114137.85781-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/slimbus/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c index 219483b79c09c..37fd655994ef3 100644 --- a/drivers/slimbus/core.c +++ b/drivers/slimbus/core.c @@ -436,8 +436,8 @@ static int slim_device_alloc_laddr(struct slim_device *sbdev, if (ret < 0) goto err; } else if (report_present) { - ret = ida_simple_get(&ctrl->laddr_ida, - 0, SLIM_LA_MANAGER - 1, GFP_KERNEL); + ret = ida_alloc_max(&ctrl->laddr_ida, + SLIM_LA_MANAGER - 1, GFP_KERNEL); if (ret < 0) goto err; -- GitLab From 9cb3ace502385ef33581e3f21c749b768dec5070 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Thu, 7 Mar 2024 11:03:27 +0800 Subject: [PATCH 1206/1418] phy: tegra: xusb: Add API to retrieve the port number of phy [ Upstream commit d843f031d9e90462253015bc0bd9e3852d206bf2 ] This patch introduces a new API, tegra_xusb_padctl_get_port_number, to the Tegra XUSB Pad Controller driver. This API is used to identify the USB port that is associated with a given PHY. The function takes a PHY pointer for either a USB2 PHY or USB3 PHY as input and returns the corresponding port number. If the PHY pointer is invalid, it returns -ENODEV. Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20240307030328.1487748-2-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/phy/tegra/xusb.c | 13 +++++++++++++ include/linux/phy/tegra/xusb.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4d5b4071d47d5..dc22b1dd2c8ba 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1518,6 +1518,19 @@ int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, } EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_usb3_companion); +int tegra_xusb_padctl_get_port_number(struct phy *phy) +{ + struct tegra_xusb_lane *lane; + + if (!phy) + return -ENODEV; + + lane = phy_get_drvdata(phy); + + return lane->index; +} +EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_port_number); + MODULE_AUTHOR("Thierry Reding "); MODULE_DESCRIPTION("Tegra XUSB Pad Controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fdc..6ca51e0080ec0 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); -- GitLab From bf1eef7fc0f32d476e32166a2e06a45e172eb1c7 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Thu, 7 Mar 2024 11:03:28 +0800 Subject: [PATCH 1207/1418] usb: gadget: tegra-xudc: Fix USB3 PHY retrieval logic [ Upstream commit 84fa943d93c31ee978355e6c6c69592dae3c9f59 ] This commit resolves an issue in the tegra-xudc USB gadget driver that incorrectly fetched USB3 PHY instances. The problem stemmed from the assumption of a one-to-one correspondence between USB2 and USB3 PHY names and their association with physical USB ports in the device tree. Previously, the driver associated USB3 PHY names directly with the USB3 instance number, leading to mismatches when mapping the physical USB ports. For instance, if using USB3-1 PHY, the driver expect the corresponding PHY name as 'usb3-1'. However, the physical USB ports in the device tree were designated as USB2-0 and USB3-0 as we only have one device controller, causing a misalignment. This commit rectifies the issue by adjusting the PHY naming logic. Now, the driver correctly correlates the USB2 and USB3 PHY instances, allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair while accurately reflecting their configuration in the device tree by naming them USB2-0 and USB3-0, respectively. The change ensures that the PHY and PHY names align appropriately, resolving the mismatch between physical USB ports and their associated names in the device tree. Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes") Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20240307030328.1487748-3-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a8cadc45c65aa..fd7a9535973ed 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3486,8 +3486,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc) static int tegra_xudc_phy_get(struct tegra_xudc *xudc) { - int err = 0, usb3; - unsigned int i; + int err = 0, usb3_companion_port; + unsigned int i, j; xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys, sizeof(*xudc->utmi_phy), GFP_KERNEL); @@ -3515,7 +3515,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) if (IS_ERR(xudc->utmi_phy[i])) { err = PTR_ERR(xudc->utmi_phy[i]); dev_err_probe(xudc->dev, err, - "failed to get usb2-%d PHY\n", i); + "failed to get PHY for phy-name usb2-%d\n", i); goto clean_up; } else if (xudc->utmi_phy[i]) { /* Get usb-phy, if utmi phy is available */ @@ -3534,19 +3534,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) } /* Get USB3 phy */ - usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); - if (usb3 < 0) + usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); + if (usb3_companion_port < 0) continue; - snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3); - xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); - if (IS_ERR(xudc->usb3_phy[i])) { - err = PTR_ERR(xudc->usb3_phy[i]); - dev_err_probe(xudc->dev, err, - "failed to get usb3-%d PHY\n", usb3); - goto clean_up; - } else if (xudc->usb3_phy[i]) - dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3); + for (j = 0; j < xudc->soc->num_phys; j++) { + snprintf(phy_name, sizeof(phy_name), "usb3-%d", j); + xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); + if (IS_ERR(xudc->usb3_phy[i])) { + err = PTR_ERR(xudc->usb3_phy[i]); + dev_err_probe(xudc->dev, err, + "failed to get PHY for phy-name usb3-%d\n", j); + goto clean_up; + } else if (xudc->usb3_phy[i]) { + int usb2_port = + tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]); + int usb3_port = + tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]); + if (usb3_port == usb3_companion_port) { + dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n", + usb2_port, usb3_port, i); + break; + } + } + } } return err; -- GitLab From 768625f84b2a6479d4e8f0bd599c53b1d5a35347 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 4 Feb 2024 16:57:36 +0100 Subject: [PATCH 1208/1418] speakup: Fix 8bit characters from direct synth [ Upstream commit b6c8dafc9d86eb77e502bb018ec4105e8d2fbf78 ] When userland echoes 8bit characters to /dev/synth with e.g. echo -e '\xe9' > /dev/synth synth_write would get characters beyond 0x7f, and thus negative when char is signed. When given to synth_buffer_add which takes a u16, this would sign-extend and produce a U+ffxy character rather than U+xy. Users thus get garbled text instead of accents in their output. Let's fix this by making sure that we read unsigned characters. Signed-off-by: Samuel Thibault Fixes: 89fc2ae80bb1 ("speakup: extend synth buffer to 16bit unicode characters") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240204155736.2oh4ot7tiaa2wpbh@begin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/accessibility/speakup/synth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c index eea2a2fa4f015..45f9061031338 100644 --- a/drivers/accessibility/speakup/synth.c +++ b/drivers/accessibility/speakup/synth.c @@ -208,8 +208,10 @@ void spk_do_flush(void) wake_up_process(speakup_task); } -void synth_write(const char *buf, size_t count) +void synth_write(const char *_buf, size_t count) { + const unsigned char *buf = (const unsigned char *) _buf; + while (count--) synth_buffer_add(*buf++); synth_start(); -- GitLab From 3a342fa31840894b28d23dc29f0eaee2eb6862e0 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 12 Feb 2024 13:01:35 +0100 Subject: [PATCH 1209/1418] PCI/AER: Block runtime suspend when handling errors [ Upstream commit 002bf2fbc00e5c4b95fb167287e2ae7d1973281e ] PM runtime can be done simultaneously with AER error handling. Avoid that by using pm_runtime_get_sync() before and pm_runtime_put() after reset in pcie_do_recovery() for all recovering devices. pm_runtime_get_sync() will increase dev->power.usage_count counter to prevent any possible future request to runtime suspend a device. It will also resume a device, if it was previously in D3hot state. I tested with igc device by doing simultaneous aer_inject and rpm suspend/resume via /sys/bus/pci/devices/PCI_ID/power/control and can reproduce: igc 0000:02:00.0: not ready 65535ms after bus reset; giving up pcieport 0000:00:1c.2: AER: Root Port link has been reset (-25) pcieport 0000:00:1c.2: AER: subordinate device reset failed pcieport 0000:00:1c.2: AER: device recovery failed igc 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible The problem disappears when this patch is applied. Link: https://lore.kernel.org/r/20240212120135.146068-1-stanislaw.gruszka@linux.intel.com Signed-off-by: Stanislaw Gruszka Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Rafael J. Wysocki Cc: Signed-off-by: Sasha Levin --- drivers/pci/pcie/err.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 59c90d04a609a..705893b5f7b09 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include +#include #include #include #include @@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ -- GitLab From 220778af6c7285dbe7a989141074433ea66b3f98 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Mar 2024 17:48:03 -0700 Subject: [PATCH 1210/1418] io_uring/net: correctly handle multishot recvmsg retry setup [ Upstream commit deaef31bc1ec7966698a427da8c161930830e1cf ] If we loop for multishot receive on the initial attempt, and then abort later on to wait for more, we miss a case where we should be copying the io_async_msghdr from the stack to stable storage. This leads to the next retry potentially failing, if the application had the msghdr on the stack. Cc: stable@vger.kernel.org Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 0d4ee3d738fbf..b1b564c04d1e7 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -876,7 +876,8 @@ retry_multishot: kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; - } + } else if (ret == -EAGAIN) + return io_setup_async_msg(req, kmsg, issue_flags); return ret; } -- GitLab From bda2265e6d1c44fc974498bd217fd8a2bddf2d9c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2023 14:45:20 -0600 Subject: [PATCH 1211/1418] sparc: Explicitly include correct DT includes [ Upstream commit 263291fa44ff0909b5b7c43ff40babc1c43362f2 ] The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it was merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Acked-by: Sam Ravnborg Link: https://lore.kernel.org/all/20230718143211.1066810-1-robh@kernel.org/ Signed-off-by: Rob Herring Stable-dep-of: 91d3ff922c34 ("sparc32: Fix parport build with sparc32") Signed-off-by: Sasha Levin --- arch/sparc/crypto/crop_devid.c | 2 +- arch/sparc/include/asm/floppy_32.h | 2 +- arch/sparc/include/asm/floppy_64.h | 2 +- arch/sparc/include/asm/parport.h | 3 ++- arch/sparc/kernel/apc.c | 2 +- arch/sparc/kernel/auxio_32.c | 1 - arch/sparc/kernel/auxio_64.c | 3 ++- arch/sparc/kernel/central.c | 2 +- arch/sparc/kernel/chmc.c | 3 ++- arch/sparc/kernel/ioport.c | 2 +- arch/sparc/kernel/leon_kernel.c | 2 -- arch/sparc/kernel/leon_pci.c | 3 ++- arch/sparc/kernel/leon_pci_grpci1.c | 3 ++- arch/sparc/kernel/leon_pci_grpci2.c | 4 +++- arch/sparc/kernel/of_device_32.c | 2 +- arch/sparc/kernel/of_device_64.c | 4 ++-- arch/sparc/kernel/of_device_common.c | 4 ++-- arch/sparc/kernel/pci.c | 3 ++- arch/sparc/kernel/pci_common.c | 3 ++- arch/sparc/kernel/pci_fire.c | 3 ++- arch/sparc/kernel/pci_impl.h | 1 - arch/sparc/kernel/pci_msi.c | 2 ++ arch/sparc/kernel/pci_psycho.c | 4 +++- arch/sparc/kernel/pci_sun4v.c | 3 ++- arch/sparc/kernel/pmc.c | 2 +- arch/sparc/kernel/power.c | 3 ++- arch/sparc/kernel/prom_irqtrans.c | 1 + arch/sparc/kernel/psycho_common.c | 1 + arch/sparc/kernel/sbus.c | 3 ++- arch/sparc/kernel/time_32.c | 1 - arch/sparc/mm/io-unit.c | 3 ++- arch/sparc/mm/iommu.c | 5 +++-- 32 files changed, 49 insertions(+), 33 deletions(-) diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c index 83fc4536dcd57..93f4e0fdd38c1 100644 --- a/arch/sparc/crypto/crop_devid.c +++ b/arch/sparc/crypto/crop_devid.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include -#include /* This is a dummy device table linked into all of the crypto * opcode drivers. It serves to trigger the module autoloading diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index e10ab9ad3097d..836f6575aa1d7 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -8,7 +8,7 @@ #define __ASM_SPARC_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 070c8c1f5c8fd..6efeb24b0a92c 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -11,7 +11,7 @@ #define __ASM_SPARC64_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 03b27090c0c8c..0a7ffcfd59cda 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -7,7 +7,8 @@ #ifndef _ASM_SPARC64_PARPORT_H #define _ASM_SPARC64_PARPORT_H 1 -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index ecd05bc0a1045..d44725d37e30f 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/auxio_32.c b/arch/sparc/kernel/auxio_32.c index a32d588174f2f..989860e890c4f 100644 --- a/arch/sparc/kernel/auxio_32.c +++ b/arch/sparc/kernel/auxio_32.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index 774a82b0c649f..2a2800d213256 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index 23f8838dd96e3..a1a6485c91831 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index 6ff43df740e08..d5fad5fb04c1d 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 4e4f3d3263e46..e5a327799e574 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 39229940d725d..4c61da491fee1 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include #include diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index e5e5ff6b9a5c5..3a73bc466f95d 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -7,7 +7,8 @@ * Code is partially derived from pcic.c */ -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index c32590bdd3120..b2b639bee0684 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -13,10 +13,11 @@ * Contributors: Daniel Hellstrom */ -#include #include #include +#include #include +#include #include #include diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index dd06abc61657f..ac2acd62a24ec 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -6,12 +6,14 @@ * */ -#include #include #include #include #include #include +#include +#include + #include #include #include diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 4ebf51e6e78ec..9ac6853b34c1b 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5a9f86b1d4e7e..a8ccd7260fe7f 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include @@ -9,8 +8,9 @@ #include #include #include -#include +#include #include +#include #include #include "of_device_common.h" diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c index e717a56efc5d3..a09724381bd40 100644 --- a/arch/sparc/kernel/of_device_common.c +++ b/arch/sparc/kernel/of_device_common.c @@ -1,15 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include -#include #include #include #include #include +#include #include #include -#include #include +#include #include "of_device_common.h" diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index cb1ef25116e94..5637b37ba9114 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -20,8 +20,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 4759ccd542fe6..5eeec9ad68457 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c index 0ca08d455e805..0b91bde80fdc5 100644 --- a/arch/sparc/kernel/pci_fire.c +++ b/arch/sparc/kernel/pci_fire.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index 4e3d15189fa95..f31761f517575 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 9ed11985768e1..fc7402948b7bc 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c index f413371da3871..1efc98305ec76 100644 --- a/arch/sparc/kernel/pci_psycho.c +++ b/arch/sparc/kernel/pci_psycho.c @@ -13,7 +13,9 @@ #include #include #include -#include +#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 3844809718052..0ddef827e0f99 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index b5c1eb33b9518..69a0206e56f01 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c index d941875dd7186..2f6c909e1755d 100644 --- a/arch/sparc/kernel/power.c +++ b/arch/sparc/kernel/power.c @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/prom_irqtrans.c b/arch/sparc/kernel/prom_irqtrans.c index 28aff1c524b58..426bd08cb2ab1 100644 --- a/arch/sparc/kernel/prom_irqtrans.c +++ b/arch/sparc/kernel/prom_irqtrans.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c index e90bcb6bad7fc..5ee74b4c0cf40 100644 --- a/arch/sparc/kernel/psycho_common.c +++ b/arch/sparc/kernel/psycho_common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c index 32141e1006c4a..0bababf6f2bcd 100644 --- a/arch/sparc/kernel/sbus.c +++ b/arch/sparc/kernel/sbus.c @@ -14,7 +14,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 8a08830e4a653..79934beba03a6 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index bf3e6d2fe5d94..3afbbe5fba46b 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 9e3f6933ca13f..14e178bfe33ab 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -7,14 +7,15 @@ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ - + #include #include #include #include #include #include -#include +#include +#include #include #include -- GitLab From f3ed58d7b3566d7c74df36dea79719b5aaf0df76 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 24 Feb 2024 18:42:27 +0100 Subject: [PATCH 1212/1418] sparc32: Fix parport build with sparc32 [ Upstream commit 91d3ff922c346d6d8cb8de5ff8d504fe0ca9e17e ] include/asm/parport.h is sparc64 specific. Rename it to parport_64.h and use the generic version for sparc32. This fixed all{mod,yes}config build errors like: parport_pc.c:(.text):undefined-reference-to-ebus_dma_enable parport_pc.c:(.text):undefined-reference-to-ebus_dma_irq_enable parport_pc.c:(.text):undefined-reference-to-ebus_dma_register The errors occur as the sparc32 build references sparc64 symbols. Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Cc: Andreas Larsson Cc: Randy Dunlap Cc: Maciej W. Rozycki Closes: https://lore.kernel.org/r/20230406160548.25721-1-rdunlap@infradead.org/ Fixes: 66bcd06099bb ("parport_pc: Also enable driver for PCI systems") Cc: stable@vger.kernel.org # v5.18+ Tested-by: Randy Dunlap # build-tested Reviewed-by: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240224-sam-fix-sparc32-all-builds-v2-6-1f186603c5c4@ravnborg.org Signed-off-by: Sasha Levin --- arch/sparc/include/asm/parport.h | 259 +--------------------------- arch/sparc/include/asm/parport_64.h | 256 +++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 252 deletions(-) create mode 100644 arch/sparc/include/asm/parport_64.h diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 0a7ffcfd59cda..e2eed8f97665f 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -1,256 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* parport.h: sparc64 specific parport initialization and dma. - * - * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) - */ +#ifndef ___ASM_SPARC_PARPORT_H +#define ___ASM_SPARC_PARPORT_H -#ifndef _ASM_SPARC64_PARPORT_H -#define _ASM_SPARC64_PARPORT_H 1 - -#include -#include - -#include -#include -#include - -#define PARPORT_PC_MAX_PORTS PARPORT_MAX - -/* - * While sparc64 doesn't have an ISA DMA API, we provide something that looks - * close enough to make parport_pc happy - */ -#define HAS_DMA - -#ifdef CONFIG_PARPORT_PC_FIFO -static DEFINE_SPINLOCK(dma_spin_lock); - -#define claim_dma_lock() \ -({ unsigned long flags; \ - spin_lock_irqsave(&dma_spin_lock, flags); \ - flags; \ -}) - -#define release_dma_lock(__flags) \ - spin_unlock_irqrestore(&dma_spin_lock, __flags); +#if defined(__sparc__) && defined(__arch64__) +#include +#else +#include +#endif #endif -static struct sparc_ebus_info { - struct ebus_dma_info info; - unsigned int addr; - unsigned int count; - int lock; - - struct parport *port; -} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; - -static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); - -static inline int request_dma(unsigned int dmanr, const char *device_id) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) - return -EINVAL; - if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) - return -EBUSY; - return 0; -} - -static inline void free_dma(unsigned int dmanr) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) { - printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); - return; - } - if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { - printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); - return; - } -} - -static inline void enable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); - - if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, - sparc_ebus_dmas[dmanr].addr, - sparc_ebus_dmas[dmanr].count)) - BUG(); -} - -static inline void disable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); -} - -static inline void clear_dma_ff(unsigned int dmanr) -{ - /* nothing */ -} - -static inline void set_dma_mode(unsigned int dmanr, char mode) -{ - ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); -} - -static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) -{ - sparc_ebus_dmas[dmanr].addr = addr; -} - -static inline void set_dma_count(unsigned int dmanr, unsigned int count) -{ - sparc_ebus_dmas[dmanr].count = count; -} - -static inline unsigned int get_dma_residue(unsigned int dmanr) -{ - return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); -} - -static int ecpp_probe(struct platform_device *op) -{ - unsigned long base = op->resource[0].start; - unsigned long config = op->resource[1].start; - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - struct device_node *parent; - struct parport *p; - int slot, err; - - parent = op->dev.of_node->parent; - if (of_node_name_eq(parent, "dma")) { - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], PARPORT_DMA_NOFIFO, - op->dev.parent->parent, 0); - if (!p) - return -ENOMEM; - dev_set_drvdata(&op->dev, p); - return 0; - } - - for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { - if (!test_and_set_bit(slot, dma_slot_map)) - break; - } - err = -ENODEV; - if (slot >= PARPORT_PC_MAX_PORTS) - goto out_err; - - spin_lock_init(&sparc_ebus_dmas[slot].info.lock); - - d_len = (op->resource[2].end - d_base) + 1UL; - sparc_ebus_dmas[slot].info.regs = - of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); - - if (!sparc_ebus_dmas[slot].info.regs) - goto out_clear_map; - - sparc_ebus_dmas[slot].info.flags = 0; - sparc_ebus_dmas[slot].info.callback = NULL; - sparc_ebus_dmas[slot].info.client_cookie = NULL; - sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; - strcpy(sparc_ebus_dmas[slot].info.name, "parport"); - if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) - goto out_unmap_regs; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); - - /* Configure IRQ to Push Pull, Level Low */ - /* Enable ECP, set bit 2 of the CTR first */ - outb(0x04, base + 0x02); - ns87303_modify(config, PCR, - PCR_EPP_ENABLE | - PCR_IRQ_ODRAIN, - PCR_ECP_ENABLE | - PCR_ECP_CLK_ENA | - PCR_IRQ_POLAR); - - /* CTR bit 5 controls direction of port */ - ns87303_modify(config, PTR, - 0, PTR_LPT_REG_DIR); - - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], - slot, - op->dev.parent, - 0); - err = -ENOMEM; - if (!p) - goto out_disable_irq; - - dev_set_drvdata(&op->dev, p); - - return 0; - -out_disable_irq: - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - -out_unmap_regs: - of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); - -out_clear_map: - clear_bit(slot, dma_slot_map); - -out_err: - return err; -} - -static int ecpp_remove(struct platform_device *op) -{ - struct parport *p = dev_get_drvdata(&op->dev); - int slot = p->dma; - - parport_pc_unregister_port(p); - - if (slot != PARPORT_DMA_NOFIFO) { - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - - d_len = (op->resource[2].end - d_base) + 1UL; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - of_iounmap(&op->resource[2], - sparc_ebus_dmas[slot].info.regs, - d_len); - clear_bit(slot, dma_slot_map); - } - - return 0; -} - -static const struct of_device_id ecpp_match[] = { - { - .name = "ecpp", - }, - { - .name = "parallel", - .compatible = "ecpp", - }, - { - .name = "parallel", - .compatible = "ns87317-ecpp", - }, - { - .name = "parallel", - .compatible = "pnpALI,1533,3", - }, - {}, -}; - -static struct platform_driver ecpp_driver = { - .driver = { - .name = "ecpp", - .of_match_table = ecpp_match, - }, - .probe = ecpp_probe, - .remove = ecpp_remove, -}; - -static int parport_pc_find_nonpci_ports(int autoirq, int autodma) -{ - return platform_driver_register(&ecpp_driver); -} - -#endif /* !(_ASM_SPARC64_PARPORT_H */ diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h new file mode 100644 index 0000000000000..0a7ffcfd59cda --- /dev/null +++ b/arch/sparc/include/asm/parport_64.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* parport.h: sparc64 specific parport initialization and dma. + * + * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) + */ + +#ifndef _ASM_SPARC64_PARPORT_H +#define _ASM_SPARC64_PARPORT_H 1 + +#include +#include + +#include +#include +#include + +#define PARPORT_PC_MAX_PORTS PARPORT_MAX + +/* + * While sparc64 doesn't have an ISA DMA API, we provide something that looks + * close enough to make parport_pc happy + */ +#define HAS_DMA + +#ifdef CONFIG_PARPORT_PC_FIFO +static DEFINE_SPINLOCK(dma_spin_lock); + +#define claim_dma_lock() \ +({ unsigned long flags; \ + spin_lock_irqsave(&dma_spin_lock, flags); \ + flags; \ +}) + +#define release_dma_lock(__flags) \ + spin_unlock_irqrestore(&dma_spin_lock, __flags); +#endif + +static struct sparc_ebus_info { + struct ebus_dma_info info; + unsigned int addr; + unsigned int count; + int lock; + + struct parport *port; +} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; + +static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); + +static inline int request_dma(unsigned int dmanr, const char *device_id) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) + return -EINVAL; + if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) + return -EBUSY; + return 0; +} + +static inline void free_dma(unsigned int dmanr) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) { + printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); + return; + } + if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { + printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); + return; + } +} + +static inline void enable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); + + if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, + sparc_ebus_dmas[dmanr].addr, + sparc_ebus_dmas[dmanr].count)) + BUG(); +} + +static inline void disable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); +} + +static inline void clear_dma_ff(unsigned int dmanr) +{ + /* nothing */ +} + +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); +} + +static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) +{ + sparc_ebus_dmas[dmanr].addr = addr; +} + +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + sparc_ebus_dmas[dmanr].count = count; +} + +static inline unsigned int get_dma_residue(unsigned int dmanr) +{ + return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); +} + +static int ecpp_probe(struct platform_device *op) +{ + unsigned long base = op->resource[0].start; + unsigned long config = op->resource[1].start; + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + struct device_node *parent; + struct parport *p; + int slot, err; + + parent = op->dev.of_node->parent; + if (of_node_name_eq(parent, "dma")) { + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], PARPORT_DMA_NOFIFO, + op->dev.parent->parent, 0); + if (!p) + return -ENOMEM; + dev_set_drvdata(&op->dev, p); + return 0; + } + + for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { + if (!test_and_set_bit(slot, dma_slot_map)) + break; + } + err = -ENODEV; + if (slot >= PARPORT_PC_MAX_PORTS) + goto out_err; + + spin_lock_init(&sparc_ebus_dmas[slot].info.lock); + + d_len = (op->resource[2].end - d_base) + 1UL; + sparc_ebus_dmas[slot].info.regs = + of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); + + if (!sparc_ebus_dmas[slot].info.regs) + goto out_clear_map; + + sparc_ebus_dmas[slot].info.flags = 0; + sparc_ebus_dmas[slot].info.callback = NULL; + sparc_ebus_dmas[slot].info.client_cookie = NULL; + sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; + strcpy(sparc_ebus_dmas[slot].info.name, "parport"); + if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) + goto out_unmap_regs; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); + + /* Configure IRQ to Push Pull, Level Low */ + /* Enable ECP, set bit 2 of the CTR first */ + outb(0x04, base + 0x02); + ns87303_modify(config, PCR, + PCR_EPP_ENABLE | + PCR_IRQ_ODRAIN, + PCR_ECP_ENABLE | + PCR_ECP_CLK_ENA | + PCR_IRQ_POLAR); + + /* CTR bit 5 controls direction of port */ + ns87303_modify(config, PTR, + 0, PTR_LPT_REG_DIR); + + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], + slot, + op->dev.parent, + 0); + err = -ENOMEM; + if (!p) + goto out_disable_irq; + + dev_set_drvdata(&op->dev, p); + + return 0; + +out_disable_irq: + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + +out_unmap_regs: + of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); + +out_clear_map: + clear_bit(slot, dma_slot_map); + +out_err: + return err; +} + +static int ecpp_remove(struct platform_device *op) +{ + struct parport *p = dev_get_drvdata(&op->dev); + int slot = p->dma; + + parport_pc_unregister_port(p); + + if (slot != PARPORT_DMA_NOFIFO) { + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + + d_len = (op->resource[2].end - d_base) + 1UL; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + of_iounmap(&op->resource[2], + sparc_ebus_dmas[slot].info.regs, + d_len); + clear_bit(slot, dma_slot_map); + } + + return 0; +} + +static const struct of_device_id ecpp_match[] = { + { + .name = "ecpp", + }, + { + .name = "parallel", + .compatible = "ecpp", + }, + { + .name = "parallel", + .compatible = "ns87317-ecpp", + }, + { + .name = "parallel", + .compatible = "pnpALI,1533,3", + }, + {}, +}; + +static struct platform_driver ecpp_driver = { + .driver = { + .name = "ecpp", + .of_match_table = ecpp_match, + }, + .probe = ecpp_probe, + .remove = ecpp_remove, +}; + +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return platform_driver_register(&ecpp_driver); +} + +#endif /* !(_ASM_SPARC64_PARPORT_H */ -- GitLab From 3abc2d160ed8213948b147295d77d44a22c88fa3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2024 11:49:57 -0500 Subject: [PATCH 1213/1418] nfs: fix UAF in direct writes [ Upstream commit 17f46b803d4f23c66cacce81db35fef3adb8f2af ] In production we have been hitting the following warning consistently ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 17 PID: 1800359 at lib/refcount.c:28 refcount_warn_saturate+0x9c/0xe0 Workqueue: nfsiod nfs_direct_write_schedule_work [nfs] RIP: 0010:refcount_warn_saturate+0x9c/0xe0 PKRU: 55555554 Call Trace: ? __warn+0x9f/0x130 ? refcount_warn_saturate+0x9c/0xe0 ? report_bug+0xcc/0x150 ? handle_bug+0x3d/0x70 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? refcount_warn_saturate+0x9c/0xe0 nfs_direct_write_schedule_work+0x237/0x250 [nfs] process_one_work+0x12f/0x4a0 worker_thread+0x14e/0x3b0 ? ZSTD_getCParams_internal+0x220/0x220 kthread+0xdc/0x120 ? __btf_name_valid+0xa0/0xa0 ret_from_fork+0x1f/0x30 This is because we're completing the nfs_direct_request twice in a row. The source of this is when we have our commit requests to submit, we process them and send them off, and then in the completion path for the commit requests we have if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); However since we're submitting asynchronous requests we sometimes have one that completes before we submit the next one, so we end up calling complete on the nfs_direct_request twice. The only other place we use nfs_generic_commit_list() is in __nfs_commit_inode, which wraps this call in a nfs_commit_begin(); nfs_commit_end(); Which is a common pattern for this style of completion handling, one that is also repeated in the direct code with get_dreq()/put_dreq() calls around where we process events as well as in the completion paths. Fix this by using the same pattern for the commit requests. Before with my 200 node rocksdb stress running this warning would pop every 10ish minutes. With my patch the stress test has been running for several hours without popping. Signed-off-by: Josef Bacik Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/direct.c | 11 +++++++++-- fs/nfs/write.c | 2 +- include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8fdb65e1b14a3..b555efca01d20 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -647,10 +647,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) LIST_HEAD(mds_list); nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_commit_begin(cinfo.mds); nfs_scan_commit(dreq->inode, &mds_list, &cinfo); res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); - if (res < 0) /* res == -ENOMEM */ - nfs_direct_write_reschedule(dreq); + if (res < 0) { /* res == -ENOMEM */ + spin_lock(&dreq->lock); + if (dreq->flags == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + } + if (nfs_commit_end(cinfo.mds)) + nfs_direct_write_complete(dreq); } static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6a06066684172..8e21caae4cae2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1656,7 +1656,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo) !atomic_read(&cinfo->rpcs_out)); } -static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) { atomic_inc(&cinfo->rpcs_out); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7931fa4725612..ac7d799d9d387 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -582,6 +582,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) -- GitLab From 32eaf695d3c7346fad54be26d21de90fec2f9a81 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 5 Mar 2024 15:12:47 -0700 Subject: [PATCH 1214/1418] kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1 [ Upstream commit 75b5ab134bb5f657ef7979a59106dce0657e8d87 ] Clang enables -Wenum-enum-conversion and -Wenum-compare-conditional under -Wenum-conversion. A recent change in Clang strengthened these warnings and they appear frequently in common builds, primarily due to several instances in common headers but there are quite a few drivers that have individual instances as well. include/linux/vmstat.h:508:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 508 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 509 | item]; | ~~~~ drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:955:24: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional] 955 | flags |= is_new_rate ? IWL_MAC_BEACON_CCK | ^ ~~~~~~~~~~~~~~~~~~ 956 | : IWL_MAC_BEACON_CCK_V1; | ~~~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:1120:21: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional] 1120 | 0) > 10 ? | ^ 1121 | IWL_MAC_BEACON_FILS : | ~~~~~~~~~~~~~~~~~~~ 1122 | IWL_MAC_BEACON_FILS_V1; | ~~~~~~~~~~~~~~~~~~~~~~ Doing arithmetic between or returning two different types of enums could be a bug, so each of the instance of the warning needs to be evaluated. Unfortunately, as mentioned above, there are many instances of this warning in many different configurations, which can break the build when CONFIG_WERROR is enabled. To avoid introducing new instances of the warnings while cleaning up the disruption for the majority of users, disable these warnings for the default build while leaving them on for W=1 builds. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2002 Link: https://github.com/llvm/llvm-project/commit/8c2ae42b3e1c6aa7c18f873edcebff7c0b45a37e Acked-by: Yonghong Song Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.extrawarn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6bbba36c59695..fa5ef41806882 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -65,6 +65,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif -- GitLab From a601e7a7fc8e9b80a78339d4d30335b2fde0ba21 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 16 Mar 2023 13:41:12 +0530 Subject: [PATCH 1215/1418] PCI: qcom: Rename qcom_pcie_config_sid_sm8250() to reflect IP version [ Upstream commit 1f70939871b260b52e9d1941f1cad740b7295c2c ] qcom_pcie_config_sid_sm8250() function no longer applies only to SM8250. So let's rename it to reflect the actual IP version and also move its definition to keep it sorted as per IP revisions. Link: https://lore.kernel.org/r/20230316081117.14288-15-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Stable-dep-of: bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly") Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-qcom.c | 143 ++++++++++++------------- 1 file changed, 71 insertions(+), 72 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 0ccd92faf078a..9202d2395b507 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1312,6 +1312,76 @@ static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie) regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies); } +static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie) +{ + /* iommu map structure */ + struct { + u32 bdf; + u32 phandle; + u32 smmu_sid; + u32 smmu_sid_len; + } *map; + void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; + struct device *dev = pcie->pci->dev; + u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; + int i, nr_map, size = 0; + u32 smmu_sid_base; + + of_get_property(dev->of_node, "iommu-map", &size); + if (!size) + return 0; + + map = kzalloc(size, GFP_KERNEL); + if (!map) + return -ENOMEM; + + of_property_read_u32_array(dev->of_node, "iommu-map", (u32 *)map, + size / sizeof(u32)); + + nr_map = size / (sizeof(*map)); + + crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); + + /* Registers need to be zero out first */ + memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); + + /* Extract the SMMU SID base from the first entry of iommu-map */ + smmu_sid_base = map[0].smmu_sid; + + /* Look for an available entry to hold the mapping */ + for (i = 0; i < nr_map; i++) { + __be16 bdf_be = cpu_to_be16(map[i].bdf); + u32 val; + u8 hash; + + hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), 0); + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + + /* If the register is already populated, look for next available entry */ + while (val) { + u8 current_hash = hash++; + u8 next_mask = 0xff; + + /* If NEXT field is NULL then update it with next hash */ + if (!(val & next_mask)) { + val |= (u32)hash; + writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); + } + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + } + + /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ + val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; + writel(val, bdf_to_sid_base + hash * sizeof(u32)); + } + + kfree(map); + + return 0; +} + static int qcom_pcie_get_resources_2_9_0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0; @@ -1429,77 +1499,6 @@ static int qcom_pcie_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static int qcom_pcie_config_sid_sm8250(struct qcom_pcie *pcie) -{ - /* iommu map structure */ - struct { - u32 bdf; - u32 phandle; - u32 smmu_sid; - u32 smmu_sid_len; - } *map; - void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; - struct device *dev = pcie->pci->dev; - u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; - int i, nr_map, size = 0; - u32 smmu_sid_base; - - of_get_property(dev->of_node, "iommu-map", &size); - if (!size) - return 0; - - map = kzalloc(size, GFP_KERNEL); - if (!map) - return -ENOMEM; - - of_property_read_u32_array(dev->of_node, - "iommu-map", (u32 *)map, size / sizeof(u32)); - - nr_map = size / (sizeof(*map)); - - crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); - - /* Registers need to be zero out first */ - memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); - - /* Extract the SMMU SID base from the first entry of iommu-map */ - smmu_sid_base = map[0].smmu_sid; - - /* Look for an available entry to hold the mapping */ - for (i = 0; i < nr_map; i++) { - __be16 bdf_be = cpu_to_be16(map[i].bdf); - u32 val; - u8 hash; - - hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), - 0); - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - - /* If the register is already populated, look for next available entry */ - while (val) { - u8 current_hash = hash++; - u8 next_mask = 0xff; - - /* If NEXT field is NULL then update it with next hash */ - if (!(val & next_mask)) { - val |= (u32)hash; - writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); - } - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - } - - /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ - val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; - writel(val, bdf_to_sid_base + hash * sizeof(u32)); - } - - kfree(map); - - return 0; -} - static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -1616,7 +1615,7 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .init = qcom_pcie_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, - .config_sid = qcom_pcie_config_sid_sm8250, + .config_sid = qcom_pcie_config_sid_1_9_0, }; /* Qcom IP rev.: 2.9.0 Synopsys IP rev.: 5.00a */ -- GitLab From 04f3652bd0fdad4f3486db6de59aa01d23eb01cb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 7 Mar 2024 16:35:15 +0530 Subject: [PATCH 1216/1418] PCI: qcom: Enable BDF to SID translation properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bf79e33cdd89db498e00a6131e937259de5f2705 ] Qcom SoCs making use of ARM SMMU require BDF to SID translation table in the driver to properly map the SID for the PCIe devices based on their BDF identifier. This is currently achieved with the help of qcom_pcie_config_sid_1_9_0() function for SoCs supporting the 1_9_0 config. But With newer Qcom SoCs starting from SM8450, BDF to SID translation is set to bypass mode by default in hardware. Due to this, the translation table that is set in the qcom_pcie_config_sid_1_9_0() is essentially unused and the default SID is used for all endpoints in SoCs starting from SM8450. This is a security concern and also warrants swapping the DeviceID in DT while using the GIC ITS to handle MSIs from endpoints. The swapping is currently done like below in DT when using GIC ITS: /* * MSIs for BDF (1:0.0) only works with Device ID 0x5980. * Hence, the IDs are swapped. */ msi-map = <0x0 &gic_its 0x5981 0x1>, <0x100 &gic_its 0x5980 0x1>; Here, swapping of the DeviceIDs ensure that the endpoint with BDF (1:0.0) gets the DeviceID 0x5980 which is associated with the default SID as per the iommu mapping in DT. So MSIs were delivered with IDs swapped so far. But this also means the Root Port (0:0.0) won't receive any MSIs (for PME, AER etc...) So let's fix these issues by clearing the BDF to SID bypass mode for all SoCs making use of the 1_9_0 config. This allows the PCIe devices to use the correct SID, thus avoiding the DeviceID swapping hack in DT and also achieving the isolation between devices. Fixes: 4c9398822106 ("PCI: qcom: Add support for configuring BDF to SID mapping for SM8250") Link: https://lore.kernel.org/linux-pci/20240307-pci-bdf-sid-fix-v1-1-9423a7e2d63c@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org # 5.11 Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-qcom.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 9202d2395b507..0bad23ec53ee8 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -50,6 +50,7 @@ #define PARF_SLV_ADDR_SPACE_SIZE 0x358 #define PARF_DEVICE_TYPE 0x1000 #define PARF_BDF_TO_SID_TABLE_N 0x2000 +#define PARF_BDF_TO_SID_CFG 0x2c00 /* ELBI registers */ #define ELBI_SYS_CTRL 0x04 @@ -102,6 +103,9 @@ /* PARF_DEVICE_TYPE register fields */ #define DEVICE_TYPE_RC 0x4 +/* PARF_BDF_TO_SID_CFG fields */ +#define BDF_TO_SID_BYPASS BIT(0) + /* ELBI_SYS_CTRL register fields */ #define ELBI_SYS_CTRL_LT_ENABLE BIT(0) @@ -1326,11 +1330,17 @@ static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie) u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; int i, nr_map, size = 0; u32 smmu_sid_base; + u32 val; of_get_property(dev->of_node, "iommu-map", &size); if (!size) return 0; + /* Enable BDF to SID translation by disabling bypass mode (default) */ + val = readl(pcie->parf + PARF_BDF_TO_SID_CFG); + val &= ~BDF_TO_SID_BYPASS; + writel(val, pcie->parf + PARF_BDF_TO_SID_CFG); + map = kzalloc(size, GFP_KERNEL); if (!map) return -ENOMEM; -- GitLab From 3d863cf207974343457c45b5c2d2d3baabd83e26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 7 Mar 2024 12:15:20 +0100 Subject: [PATCH 1217/1418] PCI: dwc: endpoint: Fix advertised resizable BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 72e34b8593e08a0ee759b7a038e0b178418ea6f8 ] The commit message in commit fc9a77040b04 ("PCI: designware-ep: Configure Resizable BAR cap to advertise the smallest size") claims that it modifies the Resizable BAR capability to only advertise support for 1 MB size BARs. However, the commit writes all zeroes to PCI_REBAR_CAP (the register which contains the possible BAR sizes that a BAR be resized to). According to the spec, it is illegal to not have a bit set in PCI_REBAR_CAP, and 1 MB is the smallest size allowed. Set bit 4 in PCI_REBAR_CAP, so that we actually advertise support for a 1 MB BAR size. Before: Capabilities: [2e8 v1] Physical Resizable BAR BAR 0: current size: 1MB BAR 1: current size: 1MB BAR 2: current size: 1MB BAR 3: current size: 1MB BAR 4: current size: 1MB BAR 5: current size: 1MB After: Capabilities: [2e8 v1] Physical Resizable BAR BAR 0: current size: 1MB, supported: 1MB BAR 1: current size: 1MB, supported: 1MB BAR 2: current size: 1MB, supported: 1MB BAR 3: current size: 1MB, supported: 1MB BAR 4: current size: 1MB, supported: 1MB BAR 5: current size: 1MB, supported: 1MB Fixes: fc9a77040b04 ("PCI: designware-ep: Configure Resizable BAR cap to advertise the smallest size") Link: https://lore.kernel.org/linux-pci/20240307111520.3303774-1-cassel@kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Cc: # 5.2 Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 4086a7818981a..506d6d061d4cd 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -669,8 +669,13 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; + /* + * PCIe r6.0, sec 7.8.6.2 require us to support at least one + * size in the range from 1 MB to 512 GB. Advertise support + * for 1 MB BAR size only. + */ for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) - dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, BIT(4)); } dw_pcie_setup(pci); -- GitLab From 4732ac1c23b5a9c755db7d4a613a757f93f61808 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 16 Feb 2024 12:22:40 -0800 Subject: [PATCH 1218/1418] PCI: hv: Fix ring buffer size calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b5ff74c1ef50fe08e384026875fec660fadfaedd ] For a physical PCI device that is passed through to a Hyper-V guest VM, current code specifies the VMBus ring buffer size as 4 pages. But this is an inappropriate dependency, since the amount of ring buffer space needed is unrelated to PAGE_SIZE. For example, on x86 the ring buffer size ends up as 16 Kbytes, while on ARM64 with 64 Kbyte pages, the ring size bloats to 256 Kbytes. The ring buffer for PCI pass-thru devices is used for only a few messages during device setup and removal, so any space above a few Kbytes is wasted. Fix this by declaring the ring buffer size to be a fixed 16 Kbytes. Furthermore, use the VMBUS_RING_SIZE() macro so that the ring buffer header is properly accounted for, and so the size is rounded up to a page boundary, using the page size for which the kernel is built. While w/64 Kbyte pages this results in a 64 Kbyte ring buffer header plus a 64 Kbyte ring buffer, that's the smallest possible with that page size. It's still 128 Kbytes better than the current code. Link: https://lore.kernel.org/linux-pci/20240216202240.251818-1-mhklinux@outlook.com Signed-off-by: Michael Kelley Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Jarvinen Reviewed-by: Long Li Cc: # 5.15.x Signed-off-by: Sasha Levin --- drivers/pci/controller/pci-hyperv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9693bab59bf7c..b36cbc9136ae1 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -49,6 +49,7 @@ #include #include #include +#include #include /* @@ -465,7 +466,7 @@ struct pci_eject_response { u32 status; } __packed; -static int pci_ring_size = (4 * PAGE_SIZE); +static int pci_ring_size = VMBUS_RING_SIZE(SZ_16K); /* * Driver specific state. -- GitLab From 0f1e6cd8fb3c36ad0ea7f120c9c5445a1c23aa54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 8 Jan 2023 17:44:24 +0200 Subject: [PATCH 1219/1418] vfio: Use GFP_KERNEL_ACCOUNT for userspace persistent allocations [ Upstream commit 0886196ca8810c5b1f5097b71c4bc0df40b10208 ] Use GFP_KERNEL_ACCOUNT for userspace persistent allocations. The GFP_KERNEL_ACCOUNT option lets the memory allocator know that this is untrusted allocation triggered from userspace and should be a subject of kmem accounting, and as such it is controlled by the cgroup mechanism. The way to find the relevant allocations was for example to look at the close_device function and trace back all the kfrees to their allocations. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230108154427.32609-4-yishaih@nvidia.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/container.c | 2 +- drivers/vfio/pci/vfio_pci_config.c | 6 +++--- drivers/vfio/pci/vfio_pci_core.c | 7 ++++--- drivers/vfio/pci/vfio_pci_igd.c | 2 +- drivers/vfio/pci/vfio_pci_intrs.c | 10 ++++++---- drivers/vfio/pci/vfio_pci_rdwr.c | 2 +- drivers/vfio/virqfd.c | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf401..ab9d8e3481f75 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -366,7 +366,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) { struct vfio_container *container; - container = kzalloc(sizeof(*container), GFP_KERNEL); + container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); if (!container) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 4a350421c5f62..523e0144c86fa 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) if (vdev->msi_perm) return len; - vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); + vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT); if (!vdev->msi_perm) return -ENOMEM; @@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) * no requirements on the length of a capability, so the gap between * capabilities needs byte granularity. */ - map = kmalloc(pdev->cfg_size, GFP_KERNEL); + map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!map) return -ENOMEM; - vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); + vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!vconfig) { kfree(map); return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index e030c2120183e..f357fd157e1ed 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -141,7 +141,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) * of the exclusive page in case that hot-add * device's bar is assigned into it. */ - dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL); + dummy_res = + kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT); if (dummy_res == NULL) goto no_mmap; @@ -856,7 +857,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, region = krealloc(vdev->region, (vdev->num_regions + 1) * sizeof(*region), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; @@ -1637,7 +1638,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, { struct vfio_pci_mmap_vma *mmap_vma; - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); + mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); if (!mmap_vma) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 5e6ca59269548..dd70e2431bd74 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) if (!addr || !(~addr)) return -ENODEV; - opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL); + opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT); if (!opregionvbt) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 40c3d7cf163f6..bffb0741518b9 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -177,7 +177,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev) if (!vdev->pdev->irq) return -ENODEV; - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -216,7 +216,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) if (fd < 0) /* Disable only */ return 0; - vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", + vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); if (!vdev->ctx[0].name) return -ENOMEM; @@ -284,7 +284,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), + GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -343,7 +344,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, if (fd < 0) return 0; - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", + vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, + "vfio-msi%s[%d](%s)", msix ? "x" : "", vector, pci_name(pdev)); if (!vdev->ctx[vector].name) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e352a033b4aef..e27de61ac9fe7 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, goto out_unlock; } - ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); + ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); if (!ioeventfd) { ret = -ENOMEM; goto out_unlock; diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b02e..a928c68df4763 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -115,7 +115,7 @@ int vfio_virqfd_enable(void *opaque, int ret = 0; __poll_t events; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); + virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT); if (!virqfd) return -ENOMEM; -- GitLab From dcc6b99c61ec6ae87bab2aaff2cd98f8c47c754a Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 11 May 2023 08:44:28 -0700 Subject: [PATCH 1220/1418] vfio/pci: Consolidate irq cleanup on MSI/MSI-X disable [ Upstream commit a65f35cfd504e5135540939cffd4323083190b36 ] vfio_msi_disable() releases all previously allocated state associated with each interrupt before disabling MSI/MSI-X. vfio_msi_disable() iterates twice over the interrupt state: first directly with a for loop to do virqfd cleanup, followed by another for loop within vfio_msi_set_block() that removes the interrupt handler and its associated state using vfio_msi_set_vector_signal(). Simplify interrupt cleanup by iterating over allocated interrupts once. Signed-off-by: Reinette Chatre Reviewed-by: Kevin Tian Acked-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/837acb8cbe86a258a50da05e56a1f17c1a19abbe.1683740667.git.reinette.chatre@intel.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index bffb0741518b9..6a9c6a143cc3a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -426,10 +426,9 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); vfio_virqfd_disable(&vdev->ctx[i].mask); + vfio_msi_set_vector_signal(vdev, i, -1, msix); } - vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); - cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); -- GitLab From f56ba2d484341c0397df896070c55800252461cb Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 11 May 2023 08:44:29 -0700 Subject: [PATCH 1221/1418] vfio/pci: Remove negative check on unsigned vector [ Upstream commit 6578ed85c7d63693669bfede01e0237d0e24211a ] User space provides the vector as an unsigned int that is checked early for validity (vfio_set_irqs_validate_and_prepare()). A later negative check of the provided vector is not necessary. Remove the negative check and ensure the type used for the vector is consistent as an unsigned int. Signed-off-by: Reinette Chatre Reviewed-by: Kevin Tian Acked-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/28521e1b0b091849952b0ecb8c118729fc8cdc4f.1683740667.git.reinette.chatre@intel.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 6a9c6a143cc3a..258de57ef9564 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -317,14 +317,14 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi } static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - int vector, int fd, bool msix) + unsigned int vector, int fd, bool msix) { struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; u16 cmd; - if (vector < 0 || vector >= vdev->num_ctx) + if (vector >= vdev->num_ctx) return -EINVAL; irq = pci_irq_vector(pdev, vector); @@ -399,7 +399,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, unsigned count, int32_t *fds, bool msix) { - int i, j, ret = 0; + unsigned int i, j; + int ret = 0; if (start >= vdev->num_ctx || start + count > vdev->num_ctx) return -EINVAL; @@ -410,8 +411,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, } if (ret) { - for (--j; j >= (int)start; j--) - vfio_msi_set_vector_signal(vdev, j, -1, msix); + for (i = start; i < j; i++) + vfio_msi_set_vector_signal(vdev, i, -1, msix); } return ret; @@ -420,7 +421,7 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; - int i; + unsigned int i; u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { @@ -542,7 +543,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - int i; + unsigned int i; bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { -- GitLab From 3fe0ac10bd117df847c93408a9d428a453cd60e5 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 8 Mar 2024 16:05:23 -0700 Subject: [PATCH 1222/1418] vfio/pci: Lock external INTx masking ops [ Upstream commit 810cd4bb53456d0503cc4e7934e063835152c1b7 ] Mask operations through config space changes to DisINTx may race INTx configuration changes via ioctl. Create wrappers that add locking for paths outside of the core interrupt code. In particular, irq_type is updated holding igate, therefore testing is_intx() requires holding igate. For example clearing DisINTx from config space can otherwise race changes of the interrupt configuration. This aligns interfaces which may trigger the INTx eventfd into two camps, one side serialized by igate and the other only enabled while INTx is configured. A subsequent patch introduces synchronization for the latter flows. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reported-by: Reinette Chatre Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 258de57ef9564..8c8b04d858454 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -60,12 +60,14 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ -bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; bool masked_changed = false; + lockdep_assert_held(&vdev->igate); + spin_lock_irqsave(&vdev->irqlock, flags); /* @@ -95,6 +97,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) return masked_changed; } +bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +{ + bool mask_changed; + + mutex_lock(&vdev->igate); + mask_changed = __vfio_pci_intx_mask(vdev); + mutex_unlock(&vdev->igate); + + return mask_changed; +} + /* * If this is triggered by an eventfd, we can't call eventfd_signal * or else we'll deadlock on the eventfd wait queue. Return >0 when @@ -137,12 +150,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) return ret; } -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { + lockdep_assert_held(&vdev->igate); + if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) vfio_send_intx_eventfd(vdev, NULL); } +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +{ + mutex_lock(&vdev->igate); + __vfio_pci_intx_unmask(vdev); + mutex_unlock(&vdev->igate); +} + static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; @@ -457,11 +479,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t unmask = *(uint8_t *)data; if (unmask) - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) @@ -484,11 +506,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t mask = *(uint8_t *)data; if (mask) - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { return -ENOTTY; /* XXX implement me */ } -- GitLab From 4ee09d4099bbed3c7d211102a3a5e7410ce87fb3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 8 Mar 2024 16:05:26 -0700 Subject: [PATCH 1223/1418] vfio/platform: Disable virqfds on cleanup [ Upstream commit fcdc0d3d40bc26c105acf8467f7d9018970944ae ] irqfds for mask and unmask that are not specifically disabled by the user are leaked. Remove any irqfds during cleanup Cc: Eric Auger Cc: Fixes: a7fa7c77cf15 ("vfio/platform: implement IRQ masking/unmasking via an eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-6-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/platform/vfio_platform_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index c5b09ec0a3c98..f2893f2fcaabd 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -321,8 +321,11 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { int i; - for (i = 0; i < vdev->num_irqs; i++) + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); vfio_set_trigger(vdev, i, -1, NULL); + } vdev->num_irqs = 0; kfree(vdev->irqs); -- GitLab From 2952d0db18cfcbca0ddeffd69eea9a3a5f17de18 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Thu, 22 Feb 2024 10:58:21 +0100 Subject: [PATCH 1224/1418] ksmbd: retrieve number of blocks using vfs_getattr in set_file_allocation_info [ Upstream commit 34cd86b6632718b7df3999d96f51e63de41c5e4f ] Use vfs_getattr() to retrieve stat information, rather than make assumptions about how a filesystem fills inode structs. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 66d25d0e34d8b..39fc078284c8e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5757,15 +5757,21 @@ static int set_file_allocation_info(struct ksmbd_work *work, loff_t alloc_blks; struct inode *inode; + struct kstat stat; int rc; if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); + if (rc) + return rc; + alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9; inode = file_inode(fp->filp); - if (alloc_blks > inode->i_blocks) { + if (alloc_blks > stat.blocks) { smb_break_all_levII_oplock(work, fp, 1); rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0, alloc_blks * 512); @@ -5773,7 +5779,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, pr_err("vfs_fallocate is failed : %d\n", rc); return rc; } - } else if (alloc_blks < inode->i_blocks) { + } else if (alloc_blks < stat.blocks) { loff_t size; /* -- GitLab From 6615ef6e3166a7ac4b23f87bc4d2f42e9dce2d3e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:03 -0500 Subject: [PATCH 1225/1418] ring-buffer: Fix waking up ring buffer readers [ Upstream commit b3594573681b53316ec0365332681a30463edfd6 ] A task can wait on a ring buffer for when it fills up to a specific watermark. The writer will check the minimum watermark that waiters are waiting for and if the ring buffer is past that, it will wake up all the waiters. The waiters are in a wait loop, and will first check if a signal is pending and then check if the ring buffer is at the desired level where it should break out of the loop. If a file that uses a ring buffer closes, and there's threads waiting on the ring buffer, it needs to wake up those threads. To do this, a "wait_index" was used. Before entering the wait loop, the waiter will read the wait_index. On wakeup, it will check if the wait_index is different than when it entered the loop, and will exit the loop if it is. The waker will only need to update the wait_index before waking up the waiters. This had a couple of bugs. One trivial one and one broken by design. The trivial bug was that the waiter checked the wait_index after the schedule() call. It had to be checked between the prepare_to_wait() and the schedule() which it was not. The main bug is that the first check to set the default wait_index will always be outside the prepare_to_wait() and the schedule(). That's because the ring_buffer_wait() doesn't have enough context to know if it should break out of the loop. The loop itself is not needed, because all the callers to the ring_buffer_wait() also has their own loop, as the callers have a better sense of what the context is to decide whether to break out of the loop or not. Just have the ring_buffer_wait() block once, and if it gets woken up, exit the function and let the callers decide what to do next. Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.792933613@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) Stable-dep-of: 761d9473e27f ("ring-buffer: Do not set shortest_full when full target is hit") Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 139 ++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e019a9278794f..3c4d62f499505 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -414,7 +414,6 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; - long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -949,14 +948,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &cpu_buffer->irq_work; } - rbwork->wait_index++; - /* make sure the waiters see the new index */ - smp_wmb(); - /* This can be called in any context */ irq_work_queue(&rbwork->work); } +static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + bool ret = false; + + /* Reads of all CPUs always waits for any data */ + if (cpu == RING_BUFFER_ALL_CPUS) + return !ring_buffer_empty(buffer); + + cpu_buffer = buffer->buffers[cpu]; + + if (!ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + return true; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + ret = !pagebusy && full_hit(buffer, cpu, full); + + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + return ret; +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on @@ -972,7 +997,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); struct rb_irq_work *work; - long wait_index; int ret = 0; /* @@ -991,81 +1015,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) work = &cpu_buffer->irq_work; } - wait_index = READ_ONCE(work->wait_index); - - while (true) { - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - if (full) - work->full_waiters_pending = true; - else - work->waiters_pending = true; - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) - break; - - if (cpu != RING_BUFFER_ALL_CPUS && - !ring_buffer_empty_cpu(buffer, cpu)) { - unsigned long flags; - bool pagebusy; - bool done; - - if (!full) - break; - - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - done = !pagebusy && full_hit(buffer, cpu, full); + if (full) + prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); + else + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (done) - break; - } + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + if (full) + work->full_waiters_pending = true; + else + work->waiters_pending = true; - schedule(); + if (rb_watermark_hit(buffer, cpu, full)) + goto out; - /* Make sure to see the new wait index */ - smp_rmb(); - if (wait_index != work->wait_index) - break; + if (signal_pending(current)) { + ret = -EINTR; + goto out; } + schedule(); + out: if (full) finish_wait(&work->full_waiters, &wait); else finish_wait(&work->waiters, &wait); + if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) + ret = -EINTR; + return ret; } -- GitLab From 1df7dcfb8af3a89cca5f2068fa889c823925cc51 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 11:56:41 -0400 Subject: [PATCH 1226/1418] ring-buffer: Do not set shortest_full when full target is hit [ Upstream commit 761d9473e27f0c8782895013a3e7b52a37c8bcfc ] The rb_watermark_hit() checks if the amount of data in the ring buffer is above the percentage level passed in by the "full" variable. If it is, it returns true. But it also sets the "shortest_full" field of the cpu_buffer that informs writers that it needs to call the irq_work if the amount of data on the ring buffer is above the requested amount. The rb_watermark_hit() always sets the shortest_full even if the amount in the ring buffer is what it wants. As it is not going to wait, because it has what it wants, there's no reason to set shortest_full. Link: https://lore.kernel.org/linux-trace-kernel/20240312115641.6aa8ba08@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: 42fb0a1e84ff5 ("tracing/ring-buffer: Have polling block on watermark") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3c4d62f499505..c934839f625df 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -974,9 +974,10 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; ret = !pagebusy && full_hit(buffer, cpu, full); - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; + if (!ret && (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full)) { + cpu_buffer->shortest_full = full; + } raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } return ret; -- GitLab From 6c2f0e055085ce6d84ae8f3e9dd3de519d96d5df Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:04 -0500 Subject: [PATCH 1227/1418] ring-buffer: Fix resetting of shortest_full [ Upstream commit 68282dd930ea38b068ce2c109d12405f40df3f93 ] The "shortest_full" variable is used to keep track of the waiter that is waiting for the smallest amount on the ring buffer before being woken up. When a tasks waits on the ring buffer, it passes in a "full" value that is a percentage. 0 means wake up on any data. 1-100 means wake up from 1% to 100% full buffer. As all waiters are on the same wait queue, the wake up happens for the waiter with the smallest percentage. The problem is that the smallest_full on the cpu_buffer that stores the smallest amount doesn't get reset when all the waiters are woken up. It does get reset when the ring buffer is reset (echo > /sys/kernel/tracing/trace). This means that tasks may be woken up more often then when they want to be. Instead, have the shortest_full field get reset just before waking up all the tasks. If the tasks wait again, they will update the shortest_full before sleeping. Also add locking around setting of shortest_full in the poll logic, and change "work" to "rbwork" to match the variable name for rb_irq_work structures that are used in other places. Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.948914369@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Signed-off-by: Steven Rostedt (Google) Stable-dep-of: 8145f1c35fa6 ("ring-buffer: Fix full_waiters_pending in poll") Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c934839f625df..3b50e17e2c9ab 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -907,8 +907,19 @@ static void rb_wake_up_waiters(struct irq_work *work) wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ + struct ring_buffer_per_cpu *cpu_buffer = + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); + + /* Called from interrupt context */ + raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; + + /* Waking up all waiters, they will reset the shortest full */ + cpu_buffer->shortest_full = 0; + raw_spin_unlock(&cpu_buffer->reader_lock); + wake_up_all(&rbwork->full_waiters); } } @@ -1086,28 +1097,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; - struct rb_irq_work *work; + struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; + rbwork = &cpu_buffer->irq_work; } if (full) { - poll_wait(filp, &work->full_waiters, poll_table); - work->full_waiters_pending = true; + unsigned long flags; + + poll_wait(filp, &rbwork->full_waiters, poll_table); + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } else { - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; } /* -- GitLab From 6e0f7e6fb1c40c4df421ece7fabdbbd15cb5c326 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 09:19:20 -0400 Subject: [PATCH 1228/1418] ring-buffer: Fix full_waiters_pending in poll [ Upstream commit 8145f1c35fa648da662078efab299c4467b85ad5 ] If a reader of the ring buffer is doing a poll, and waiting for the ring buffer to hit a specific watermark, there could be a case where it gets into an infinite ping-pong loop. The poll code has: rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; The writer will see full_waiters_pending and check if the ring buffer is filled over the percentage of the shortest_full value. If it is, it calls an irq_work to wake up all the waiters. But the code could get into a circular loop: CPU 0 CPU 1 ----- ----- [ Poll ] [ shortest_full = 0 ] rbwork->full_waiters_pending = true; if (rbwork->full_waiters_pending && [ buffer percent ] > shortest_full) { rbwork->wakeup_full = true; [ queue_irqwork ] cpu_buffer->shortest_full = full; [ IRQ work ] if (rbwork->wakeup_full) { cpu_buffer->shortest_full = 0; wakeup poll waiters; [woken] if ([ buffer percent ] > full) break; rbwork->full_waiters_pending = true; if (rbwork->full_waiters_pending && [ buffer percent ] > shortest_full) { rbwork->wakeup_full = true; [ queue_irqwork ] cpu_buffer->shortest_full = full; [ IRQ work ] if (rbwork->wakeup_full) { cpu_buffer->shortest_full = 0; wakeup poll waiters; [woken] [ Wash, rinse, repeat! ] In the poll, the shortest_full needs to be set before the full_pending_waiters, as once that is set, the writer will compare the current shortest_full (which is incorrect) to decide to call the irq_work, which will reset the shortest_full (expecting the readers to update it). Also move the setting of full_waiters_pending after the check if the ring buffer has the required percentage filled. There's no reason to tell the writer to wake up waiters if there are no waiters. Link: https://lore.kernel.org/linux-trace-kernel/20240312131952.630922155@goodmis.org Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 42fb0a1e84ff5 ("tracing/ring-buffer: Have polling block on watermark") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3b50e17e2c9ab..e07f45d1890d3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1116,16 +1116,32 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, poll_wait(filp, &rbwork->full_waiters, poll_table); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - } else { - poll_wait(filp, &rbwork->waiters, poll_table); - rbwork->waiters_pending = true; + if (full_hit(buffer, cpu, full)) + return EPOLLIN | EPOLLRDNORM; + /* + * Only allow full_waiters_pending update to be seen after + * the shortest_full is set. If the writer sees the + * full_waiters_pending flag set, it will compare the + * amount in the ring buffer to shortest_full. If the amount + * in the ring buffer is greater than the shortest_full + * percent, it will call the irq_work handler to wake up + * this list. The irq_handler will reset shortest_full + * back to zero. That's done under the reader_lock, but + * the below smp_mb() makes sure that the update to + * full_waiters_pending doesn't leak up into the above. + */ + smp_mb(); + rbwork->full_waiters_pending = true; + return 0; } + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1141,9 +1157,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); - if (full) - return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; -- GitLab From b1cf18e5bd871498be689c8c471b66e5e0d08655 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 08:15:07 -0400 Subject: [PATCH 1229/1418] ring-buffer: Use wait_event_interruptible() in ring_buffer_wait() [ Upstream commit 7af9ded0c2caac0a95f33df5cb04706b0f502588 ] Convert ring_buffer_wait() over to wait_event_interruptible(). The default condition is to execute the wait loop inside __wait_event() just once. This does not change the ring_buffer_wait() prototype yet, but restructures the code so that it can take a "cond" and "data" parameter and will call wait_event_interruptible() with a helper function as the condition. The helper function (rb_wait_cond) takes the cond function and data parameters. It will first check if the buffer hit the watermark defined by the "full" parameter and then call the passed in condition parameter. If either are true, it returns true. If rb_wait_cond() does not return true, it will set the appropriate "waiters_pending" flag and returns false. Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wgsNgewHFxZAJiAQznwPMqEtQmi1waeS2O1v6L4c_Um5A@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240312121703.399598519@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 116 +++++++++++++++++++++--------------- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c7d295746f67..3e7bfc0f65aee 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,6 +98,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e07f45d1890d3..431a922e5c89e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -994,43 +994,15 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) return ret; } -/** - * ring_buffer_wait - wait for input to the ring buffer - * @buffer: buffer to wait on - * @cpu: the cpu buffer to wait on - * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS - * - * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon - * as data is added to any of the @buffer's cpu buffers. Otherwise - * it will wait for data to be added to a specific cpu buffer. - */ -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +static inline bool +rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + int cpu, int full, ring_buffer_cond_fn cond, void *data) { - struct ring_buffer_per_cpu *cpu_buffer; - DEFINE_WAIT(wait); - struct rb_irq_work *work; - int ret = 0; - - /* - * Depending on what the caller is waiting for, either any - * data in any cpu buffer, or a specific buffer, put the - * caller on the appropriate wait queue. - */ - if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; - /* Full only makes sense on per cpu reads */ - full = 0; - } else { - if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -ENODEV; - cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; - } + if (rb_watermark_hit(buffer, cpu, full)) + return true; - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + if (cond(data)) + return true; /* * The events can happen in critical sections where @@ -1053,27 +1025,75 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * a task has been queued. It's OK for spurious wake ups. */ if (full) - work->full_waiters_pending = true; + rbwork->full_waiters_pending = true; else - work->waiters_pending = true; + rbwork->waiters_pending = true; - if (rb_watermark_hit(buffer, cpu, full)) - goto out; + return false; +} - if (signal_pending(current)) { - ret = -EINTR; - goto out; +/* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ +static bool rb_wait_once(void *data) +{ + long *once = data; + + /* wait_event() actually calls this twice before scheduling*/ + if (*once > 1) + return true; + + (*once)++; + return false; +} + +/** + * ring_buffer_wait - wait for input to the ring buffer + * @buffer: buffer to wait on + * @cpu: the cpu buffer to wait on + * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise + * it will wait for data to be added to a specific cpu buffer. + */ +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + struct wait_queue_head *waitq; + ring_buffer_cond_fn cond; + struct rb_irq_work *rbwork; + void *data; + long once = 0; + int ret = 0; + + cond = rb_wait_once; + data = &once; + + /* + * Depending on what the caller is waiting for, either any + * data in any cpu buffer, or a specific buffer, put the + * caller on the appropriate wait queue. + */ + if (cpu == RING_BUFFER_ALL_CPUS) { + rbwork = &buffer->irq_work; + /* Full only makes sense on per cpu reads */ + full = 0; + } else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; + cpu_buffer = buffer->buffers[cpu]; + rbwork = &cpu_buffer->irq_work; } - schedule(); - out: if (full) - finish_wait(&work->full_waiters, &wait); + waitq = &rbwork->full_waiters; else - finish_wait(&work->waiters, &wait); + waitq = &rbwork->waiters; - if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) - ret = -EINTR; + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } -- GitLab From a62168653774c36398d65846a98034436ee66d03 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 11 Mar 2024 12:38:29 -0400 Subject: [PATCH 1230/1418] soc: fsl: qbman: Always disable interrupts when taking cgr_lock [ Upstream commit 584c2a9184a33a40fceee838f856de3cffa19be3 ] smp_call_function_single disables IRQs when executing the callback. To prevent deadlocks, we must disable IRQs when taking cgr_lock elsewhere. This is already done by qman_update_cgr and qman_delete_cgr; fix the other lockers. Fixes: 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()") CC: stable@vger.kernel.org Signed-off-by: Sean Anderson Reviewed-by: Camelia Groza Tested-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 739e4eee6b75c..1bf1f1ea67f00 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1456,11 +1456,11 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2440,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2477,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } -- GitLab From d6b5aac451c9cc12e43ab7308e0e2ddc52c62c14 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 11 Mar 2024 12:38:30 -0400 Subject: [PATCH 1231/1418] soc: fsl: qbman: Use raw spinlock for cgr_lock [ Upstream commit fbec4e7fed89b579f2483041fabf9650fb0dd6bc ] smp_call_function always runs its callback in hard IRQ context, even on PREEMPT_RT, where spinlocks can sleep. So we need to use a raw spinlock for cgr_lock to ensure we aren't waiting on a sleeping task. Although this bug has existed for a while, it was not apparent until commit ef2a8d5478b9 ("net: dpaa: Adjust queue depth on rate change") which invokes smp_call_function_single via qman_update_cgr_safe every time a link goes up or down. Fixes: 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()") CC: stable@vger.kernel.org Reported-by: Vladimir Oltean Closes: https://lore.kernel.org/all/20230323153935.nofnjucqjqnz34ej@skbuf/ Reported-by: Steffen Trumtrar Closes: https://lore.kernel.org/linux-arm-kernel/87wmsyvclu.fsf@pengutronix.de/ Signed-off-by: Sean Anderson Reviewed-by: Camelia Groza Tested-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 1bf1f1ea67f00..7e9074519ad22 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -991,7 +991,7 @@ struct qman_portal { /* linked-list of CSCN handlers. */ struct list_head cgr_cbs; /* list lock */ - spinlock_t cgr_lock; + raw_spinlock_t cgr_lock; struct work_struct congestion_work; struct work_struct mr_work; char irqname[MAX_IRQNAME]; @@ -1281,7 +1281,7 @@ static int qman_create_portal(struct qman_portal *portal, /* if the given mask is NULL, assume all CGRs can be seen */ qman_cgrs_fill(&portal->cgrs[0]); INIT_LIST_HEAD(&portal->cgr_cbs); - spin_lock_init(&portal->cgr_lock); + raw_spin_lock_init(&portal->cgr_lock); INIT_WORK(&portal->congestion_work, qm_congestion_task); INIT_WORK(&portal->mr_work, qm_mr_process_task); portal->bits = 0; @@ -1456,11 +1456,14 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock_irq(&p->cgr_lock); + /* + * FIXME: QM_MCR_TIMEOUT is 10ms, which is too long for a raw spinlock! + */ + raw_spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1479,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2443,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock_irq(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2480,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } @@ -2512,7 +2515,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) return -EINVAL; memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr)); - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); list_del(&cgr->node); /* * If there are no other CGR objects for this CGRID in the list, @@ -2537,7 +2540,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) /* add back to the list */ list_add(&cgr->node, &p->cgr_cbs); release_lock: - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } @@ -2577,9 +2580,9 @@ static int qman_update_cgr(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) if (!p) return -EINVAL; - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); ret = qm_modify_cgr(cgr, 0, opts); - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } -- GitLab From b7f6c3630eb3f103115ab0d7613588064f665d0d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 29 Feb 2024 15:20:09 +0100 Subject: [PATCH 1232/1418] s390/zcrypt: fix reference counting on zcrypt card objects [ Upstream commit 50ed48c80fecbe17218afed4f8bed005c802976c ] Tests with hot-plugging crytpo cards on KVM guests with debug kernel build revealed an use after free for the load field of the struct zcrypt_card. The reason was an incorrect reference handling of the zcrypt card object which could lead to a free of the zcrypt card object while it was still in use. This is an example of the slab message: kernel: 0x00000000885a7512-0x00000000885a7513 @offset=1298. First byte 0x68 instead of 0x6b kernel: Allocated in zcrypt_card_alloc+0x36/0x70 [zcrypt] age=18046 cpu=3 pid=43 kernel: kmalloc_trace+0x3f2/0x470 kernel: zcrypt_card_alloc+0x36/0x70 [zcrypt] kernel: zcrypt_cex4_card_probe+0x26/0x380 [zcrypt_cex4] kernel: ap_device_probe+0x15c/0x290 kernel: really_probe+0xd2/0x468 kernel: driver_probe_device+0x40/0xf0 kernel: __device_attach_driver+0xc0/0x140 kernel: bus_for_each_drv+0x8c/0xd0 kernel: __device_attach+0x114/0x198 kernel: bus_probe_device+0xb4/0xc8 kernel: device_add+0x4d2/0x6e0 kernel: ap_scan_adapter+0x3d0/0x7c0 kernel: ap_scan_bus+0x5a/0x3b0 kernel: ap_scan_bus_wq_callback+0x40/0x60 kernel: process_one_work+0x26e/0x620 kernel: worker_thread+0x21c/0x440 kernel: Freed in zcrypt_card_put+0x54/0x80 [zcrypt] age=9024 cpu=3 pid=43 kernel: kfree+0x37e/0x418 kernel: zcrypt_card_put+0x54/0x80 [zcrypt] kernel: ap_device_remove+0x4c/0xe0 kernel: device_release_driver_internal+0x1c4/0x270 kernel: bus_remove_device+0x100/0x188 kernel: device_del+0x164/0x3c0 kernel: device_unregister+0x30/0x90 kernel: ap_scan_adapter+0xc8/0x7c0 kernel: ap_scan_bus+0x5a/0x3b0 kernel: ap_scan_bus_wq_callback+0x40/0x60 kernel: process_one_work+0x26e/0x620 kernel: worker_thread+0x21c/0x440 kernel: kthread+0x150/0x168 kernel: __ret_from_fork+0x3c/0x58 kernel: ret_from_fork+0xa/0x30 kernel: Slab 0x00000372022169c0 objects=20 used=18 fp=0x00000000885a7c88 flags=0x3ffff00000000a00(workingset|slab|node=0|zone=1|lastcpupid=0x1ffff) kernel: Object 0x00000000885a74b8 @offset=1208 fp=0x00000000885a7c88 kernel: Redzone 00000000885a74b0: bb bb bb bb bb bb bb bb ........ kernel: Object 00000000885a74b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74f8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a7508: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 68 4b 6b 6b 6b a5 kkkkkkkkkkhKkkk. kernel: Redzone 00000000885a7518: bb bb bb bb bb bb bb bb ........ kernel: Padding 00000000885a756c: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZ kernel: CPU: 0 PID: 387 Comm: systemd-udevd Not tainted 6.8.0-HF #2 kernel: Hardware name: IBM 3931 A01 704 (KVM/Linux) kernel: Call Trace: kernel: [<00000000ca5ab5b8>] dump_stack_lvl+0x90/0x120 kernel: [<00000000c99d78bc>] check_bytes_and_report+0x114/0x140 kernel: [<00000000c99d53cc>] check_object+0x334/0x3f8 kernel: [<00000000c99d820c>] alloc_debug_processing+0xc4/0x1f8 kernel: [<00000000c99d852e>] get_partial_node.part.0+0x1ee/0x3e0 kernel: [<00000000c99d94ec>] ___slab_alloc+0xaf4/0x13c8 kernel: [<00000000c99d9e38>] __slab_alloc.constprop.0+0x78/0xb8 kernel: [<00000000c99dc8dc>] __kmalloc+0x434/0x590 kernel: [<00000000c9b4c0ce>] ext4_htree_store_dirent+0x4e/0x1c0 kernel: [<00000000c9b908a2>] htree_dirblock_to_tree+0x17a/0x3f0 kernel: [<00000000c9b919dc>] ext4_htree_fill_tree+0x134/0x400 kernel: [<00000000c9b4b3d0>] ext4_dx_readdir+0x160/0x2f0 kernel: [<00000000c9b4bedc>] ext4_readdir+0x5f4/0x760 kernel: [<00000000c9a7efc4>] iterate_dir+0xb4/0x280 kernel: [<00000000c9a7f1ea>] __do_sys_getdents64+0x5a/0x120 kernel: [<00000000ca5d6946>] __do_syscall+0x256/0x310 kernel: [<00000000ca5eea10>] system_call+0x70/0x98 kernel: INFO: lockdep is turned off. kernel: FIX kmalloc-96: Restoring Poison 0x00000000885a7512-0x00000000885a7513=0x6b kernel: FIX kmalloc-96: Marking all objects used The fix is simple: Before use of the queue not only the queue object but also the card object needs to increase it's reference count with a call to zcrypt_card_get(). Similar after use of the queue not only the queue but also the card object's reference count is decreased with zcrypt_card_put(). Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- drivers/s390/crypto/zcrypt_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 28e34d155334b..6f44963d34bbf 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -617,6 +617,7 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, { if (!zq || !try_module_get(zq->queue->ap_dev.device.driver->owner)) return NULL; + zcrypt_card_get(zc); zcrypt_queue_get(zq); get_device(&zq->queue->ap_dev.device); atomic_add(weight, &zc->load); @@ -636,6 +637,7 @@ static inline void zcrypt_drop_queue(struct zcrypt_card *zc, atomic_sub(weight, &zq->load); put_device(&zq->queue->ap_dev.device); zcrypt_queue_put(zq); + zcrypt_card_put(zc); module_put(mod); } -- GitLab From cb676955d1acb065b06d92219d1b1fb8231823fe Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:39 +0200 Subject: [PATCH 1233/1418] drm/probe-helper: warn about negative .get_modes() [ Upstream commit 7af03e688792293ba33149fb8df619a8dff90e80 ] The .get_modes() callback is supposed to return the number of modes, never a negative error code. If a negative value is returned, it'll just be interpreted as a negative count, and added to previous calculations. Document the rules, but handle the negative values gracefully with an error message. Cc: stable@vger.kernel.org Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/50208c866facc33226a3c77b82bb96aeef8ef310.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_probe_helper.c | 7 +++++++ include/drm/drm_modeset_helper_vtables.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 3b968ad187cf3..52dbaf74fe164 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -362,6 +362,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector) count = connector_funcs->get_modes(connector); + /* The .get_modes() callback should not return negative values. */ + if (count < 0) { + drm_err(connector->dev, ".get_modes() returned %pe\n", + ERR_PTR(count)); + count = 0; + } + /* * Fallback for when DDC probe failed in drm_get_edid() and thus skipped * override/firmware EDID. diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index fafa70ac1337f..6f19cf5c210e5 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -896,7 +896,8 @@ struct drm_connector_helper_funcs { * * RETURNS: * - * The number of modes added by calling drm_mode_probed_add(). + * The number of modes added by calling drm_mode_probed_add(). Return 0 + * on failures (no modes) instead of negative error codes. */ int (*get_modes)(struct drm_connector *connector); -- GitLab From 191df89d8fb0a6b5b4165912ff6b6c7a8138323c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:40 +0200 Subject: [PATCH 1234/1418] drm/panel: do not return negative error codes from drm_panel_get_modes() [ Upstream commit fc4e97726530241d96dd7db72eb65979217422c9 ] None of the callers of drm_panel_get_modes() expect it to return negative error codes. Either they propagate the return value in their struct drm_connector_helper_funcs .get_modes() hook (which is also not supposed to return negative codes), or add it to other counts leading to bogus values. On the other hand, many of the struct drm_panel_funcs .get_modes() hooks do return negative error codes, so handle them gracefully instead of propagating further. Return 0 for no modes, whatever the reason. Cc: Neil Armstrong Cc: Jessica Zhang Cc: Sam Ravnborg Cc: stable@vger.kernel.org Reviewed-by: Neil Armstrong Reviewed-by: Jessica Zhang Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/79f559b72d8c493940417304e222a4b04dfa19c4.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index f634371c717a8..7fd3de89ed079 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -207,19 +207,24 @@ EXPORT_SYMBOL(drm_panel_disable); * The modes probed from the panel are automatically added to the connector * that the panel is attached to. * - * Return: The number of modes available from the panel on success or a - * negative error code on failure. + * Return: The number of modes available from the panel on success, or 0 on + * failure (no modes). */ int drm_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { if (!panel) - return -EINVAL; + return 0; - if (panel->funcs && panel->funcs->get_modes) - return panel->funcs->get_modes(panel, connector); + if (panel->funcs && panel->funcs->get_modes) { + int num; - return -EOPNOTSUPP; + num = panel->funcs->get_modes(panel, connector); + if (num > 0) + return num; + } + + return 0; } EXPORT_SYMBOL(drm_panel_get_modes); -- GitLab From 8f914db6fe252c5e78a9b8b03adc1b0a33aec25d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:41 +0200 Subject: [PATCH 1235/1418] drm/exynos: do not return negative values from .get_modes() [ Upstream commit 13d5b040363c7ec0ac29c2de9cf661a24a8aa531 ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: stable@vger.kernel.org Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/d8665f620d9c252aa7d5a4811ff6b16e773903a2.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 4 ++-- drivers/gpu/drm/exynos/exynos_hdmi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index f5e1adfcaa514..fb941a8c99f0f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -316,14 +316,14 @@ static int vidi_get_modes(struct drm_connector *connector) */ if (!ctx->raw_edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "raw_edid is null.\n"); - return -EFAULT; + return 0; } edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH; edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL); if (!edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "failed to allocate edid\n"); - return -ENOMEM; + return 0; } drm_connector_update_edid_property(connector, edid); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 1a7194a653ae5..be2d9cbaaef2e 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return -ENODEV; + return 0; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return -ENODEV; + return 0; hdata->dvi_mode = !connector->display_info.is_hdmi; DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", -- GitLab From ae696b7c00ef30285228fb2b001f59bed711636e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:43 +0200 Subject: [PATCH 1236/1418] drm/imx/ipuv3: do not return negative values from .get_modes() [ Upstream commit c2da9ada64962fcd2e6395ed9987b9874ea032d3 ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Philipp Zabel Cc: stable@vger.kernel.org Acked-by: Philipp Zabel Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/311f6eec96d47949b16a670529f4d89fcd97aefa.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/parallel-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06723b2e9b847..64b6bc2de873e 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -72,14 +72,14 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) int ret; if (!mode) - return -EINVAL; + return 0; ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); if (ret) { drm_mode_destroy(connector->dev, mode); - return ret; + return 0; } drm_mode_copy(mode, &imxpd->mode); -- GitLab From fcf7345280af593ebd41c58a3ad0675acdad5119 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:44 +0200 Subject: [PATCH 1237/1418] drm/vc4: hdmi: do not return negative values from .get_modes() [ Upstream commit abf493988e380f25242c1023275c68bd3579c9ce ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Maxime Ripard Cc: stable@vger.kernel.org Acked-by: Maxime Ripard Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/dcda6d4003e2c6192987916b35c7304732800e08.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index ea2eaf6032caa..f696818913499 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -497,7 +497,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) edid = drm_get_edid(connector, vc4_hdmi->ddc); cec_s_phys_addr_from_edid(vc4_hdmi->cec_adap, edid); if (!edid) - return -ENODEV; + return 0; drm_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); -- GitLab From 50e9f82b1c1dc125c919d2f0d6a2cb77efe75fbb Mon Sep 17 00:00:00 2001 From: Qiang Zhang Date: Tue, 12 Mar 2024 16:04:23 +0800 Subject: [PATCH 1238/1418] memtest: use {READ,WRITE}_ONCE in memory scanning [ Upstream commit 82634d7e24271698e50a3ec811e5f50de790a65f ] memtest failed to find bad memory when compiled with clang. So use {WRITE,READ}_ONCE to access memory to avoid compiler over optimization. Link: https://lkml.kernel.org/r/20240312080422.691222-1-qiang4.zhang@intel.com Signed-off-by: Qiang Zhang Cc: Bill Wendling Cc: Justin Stitt Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/memtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memtest.c b/mm/memtest.c index f53ace709ccd8..d407373f225b4 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -46,10 +46,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size last_bad = 0; for (p = start; p < end; p++) - *p = pattern; + WRITE_ONCE(*p, pattern); for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) + if (READ_ONCE(*p) == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; -- GitLab From af4b1a5aa00bd947bce1d7074a9b899a53bc37dd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 Mar 2024 14:42:18 -0700 Subject: [PATCH 1239/1418] Revert "block/mq-deadline: use correct way to throttling write requests" [ Upstream commit 256aab46e31683d76d45ccbedc287b4d3f3e322b ] The code "max(1U, 3 * (1U << shift) / 4)" comes from the Kyber I/O scheduler. The Kyber I/O scheduler maintains one internal queue per hwq and hence derives its async_depth from the number of hwq tags. Using this approach for the mq-deadline scheduler is wrong since the mq-deadline scheduler maintains one internal queue for all hwqs combined. Hence this revert. Cc: stable@vger.kernel.org Cc: Damien Le Moal Cc: Harshit Mogalapalli Cc: Zhiguo Niu Fixes: d47f9717e5cf ("block/mq-deadline: use correct way to throttling write requests") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240313214218.1736147-1-bvanassche@acm.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/mq-deadline.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 55e26065c2e27..f10c2a0d18d41 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -622,9 +622,8 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - dd->async_depth = max(1U, 3 * (1U << shift) / 4); + dd->async_depth = max(1UL, 3 * q->nr_requests / 4); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); } -- GitLab From 1f5124c74aaafdb6b20adc8d479d4c7cd225eb70 Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:19 +0900 Subject: [PATCH 1240/1418] f2fs: mark inode dirty for FI_ATOMIC_COMMITTED flag [ Upstream commit 4bf78322346f6320313683dc9464e5423423ad5c ] In f2fs_update_inode, i_size of the atomic file isn't updated until FI_ATOMIC_COMMITTED flag is set. When committing atomic write right after the writeback of the inode, i_size of the raw inode will not be updated. It can cause the atomicity corruption due to a mismatch between old file size and new data. To prevent the problem, let's mark inode dirty for FI_ATOMIC_COMMITTED Atomic write thread Writeback thread __writeback_single_inode write_inode f2fs_update_inode - skip i_size update f2fs_ioc_commit_atomic_write f2fs_commit_atomic_write set_inode_flag(inode, FI_ATOMIC_COMMITTED) f2fs_do_sync_file f2fs_fsync_node_pages - skip f2fs_update_inode since the inode is clean Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5ae1c4aa3ae92..b54d681c6457d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3000,6 +3000,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: + case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } -- GitLab From f5bc133ef62e0a355249e9c40657832ef76734fe Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:20 +0900 Subject: [PATCH 1241/1418] f2fs: truncate page cache before clearing flags when aborting atomic write [ Upstream commit 74b0ebcbdde4c7fe23c979e4cfc2fdbf349c39a3 ] In f2fs_do_write_data_page, FI_ATOMIC_FILE flag selects the target inode between the original inode and COW inode. When aborting atomic write and writeback occur simultaneously, invalid data can be written to original inode if the FI_ATOMIC_FILE flag is cleared meanwhile. To prevent the problem, let's truncate all pages before clearing the flag Atomic write thread Writeback thread f2fs_abort_atomic_write clear_inode_flag(inode, FI_ATOMIC_FILE) __writeback_single_inode do_writepages f2fs_do_write_data_page - use dn of original inode truncate_inode_pages_final Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa1ba2fdfe00d..205216c1db91f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -200,7 +203,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } -- GitLab From 46b832e09d43b394ac0f6d9485d2b1a06593f0b7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 13 Mar 2024 19:58:26 +0900 Subject: [PATCH 1242/1418] nilfs2: fix failure to detect DAT corruption in btree and direct mappings [ Upstream commit f2f26b4a84a0ef41791bd2d70861c8eac748f4ba ] Patch series "nilfs2: fix kernel bug at submit_bh_wbc()". This resolves a kernel BUG reported by syzbot. Since there are two flaws involved, I've made each one a separate patch. The first patch alone resolves the syzbot-reported bug, but I think both fixes should be sent to stable, so I've tagged them as such. This patch (of 2): Syzbot has reported a kernel bug in submit_bh_wbc() when writing file data to a nilfs2 file system whose metadata is corrupted. There are two flaws involved in this issue. The first flaw is that when nilfs_get_block() locates a data block using btree or direct mapping, if the disk address translation routine nilfs_dat_translate() fails with internal code -ENOENT due to DAT metadata corruption, it can be passed back to nilfs_get_block(). This causes nilfs_get_block() to misidentify an existing block as non-existent, causing both data block lookup and insertion to fail inconsistently. The second flaw is that nilfs_get_block() returns a successful status in this inconsistent state. This causes the caller __block_write_begin_int() or others to request a read even though the buffer is not mapped, resulting in a BUG_ON check for the BH_Mapped flag in submit_bh_wbc() failing. This fixes the first issue by changing the return value to code -EINVAL when a conversion using DAT fails with code -ENOENT, avoiding the conflicting condition that leads to the kernel bug described above. Here, code -EINVAL indicates that metadata corruption was detected during the block lookup, which will be properly handled as a file system error and converted to -EIO when passing through the nilfs2 bmap layer. Link: https://lkml.kernel.org/r/20240313105827.5296-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240313105827.5296-2-konishi.ryusuke@gmail.com Fixes: c3a7abf06ce7 ("nilfs2: support contiguous lookup of blocks") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+cfed5b56649bddf80d6e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=cfed5b56649bddf80d6e Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 9 +++++++-- fs/nilfs2/direct.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 40ce92a332fe7..146640f0607a3 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) @@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b2422..8f802f7b0840b 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } @@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 -- GitLab From 192e9f9078c96be30b31c4b44d6294b24520fce5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 13 Mar 2024 19:58:27 +0900 Subject: [PATCH 1243/1418] nilfs2: prevent kernel bug at submit_bh_wbc() [ Upstream commit 269cdf353b5bdd15f1a079671b0f889113865f20 ] Fix a bug where nilfs_get_block() returns a successful status when searching and inserting the specified block both fail inconsistently. If this inconsistent behavior is not due to a previously fixed bug, then an unexpected race is occurring, so return a temporary error -EAGAIN instead. This prevents callers such as __block_write_begin_int() from requesting a read into a buffer that is not mapped, which would cause the BUG_ON check for the BH_Mapped flag in submit_bh_wbc() to fail. Link: https://lkml.kernel.org/r/20240313105827.5296-3-konishi.ryusuke@gmail.com Fixes: 1f5abe7e7dbc ("nilfs2: replace BUG_ON and BUG calls triggerable from ioctl") Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index f625872321cca..8eb4288d46fe0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); - err = 0; + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; -- GitLab From bbec4e4d84c3a9005ccacc520837c4f650965654 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Fri, 1 Mar 2024 17:53:44 +0300 Subject: [PATCH 1244/1418] cifs: open_cached_dir(): add FILE_READ_EA to desired access [ Upstream commit f1b8224b4e6ed59e7e6f5c548673c67410098d8d ] Since smb2_query_eas() reads EA and uses cached directory, open_cached_dir() should request FILE_READ_EA access. Otherwise listxattr() and getxattr() will fail with EACCES (0xc0000022 STATUS_ACCESS_DENIED SMB status). Link: https://bugzilla.kernel.org/show_bug.cgi?id=218543 Cc: stable@vger.kernel.org Signed-off-by: Eugene Korenevsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fd082151c5f9b..86fe433b1d324 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -218,7 +218,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, .tcon = tcon, .path = path, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE), - .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES, + .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES | + FILE_READ_EA, .disposition = FILE_OPEN, .fid = pfid, }; -- GitLab From 9f23176ad72ff28adadd638fd1236419081ebc63 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Mar 2024 13:54:57 +0100 Subject: [PATCH 1245/1418] cpufreq: dt: always allocate zeroed cpumask [ Upstream commit d2399501c2c081eac703ca9597ceb83c7875a537 ] Commit 0499a78369ad ("ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512") changed the handling of cpumasks on ARM 64bit, what resulted in the strange issues and warnings during cpufreq-dt initialization on some big.LITTLE platforms. This was caused by mixing OPPs between big and LITTLE cores, because OPP-sharing information between big and LITTLE cores is computed on cpumask, which in turn was not zeroed on allocation. Fix this by switching to zalloc_cpumask_var() call. Fixes: dc279ac6e5b4 ("cpufreq: dt: Refactor initialization to handle probe deferral properly") CC: stable@vger.kernel.org # v5.10+ Signed-off-by: Marek Szyprowski Reviewed-by: Christoph Lameter (Ampere) Reviewed-by: Dhruva Gole Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq-dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4aec4b2a52259..8f8f1949d66f6 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -208,7 +208,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!priv) return -ENOMEM; - if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; cpumask_set_cpu(cpu, priv->cpus); -- GitLab From efd67e570ae77abf674a3d5e69192928f1f61e55 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 15 Mar 2024 22:42:27 +0100 Subject: [PATCH 1246/1418] x86/CPU/AMD: Update the Zenbleed microcode revisions [ Upstream commit 5c84b051bd4e777cf37aaff983277e58c99618d5 ] Update them to the correct revision numbers. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Signed-off-by: Borislav Petkov (AMD) Cc: Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/amd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c1d09c8844d67..425092806f8fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -997,11 +997,11 @@ static bool cpu_has_zenbleed_microcode(void) u32 good_rev = 0; switch (boot_cpu_data.x86_model) { - case 0x30 ... 0x3f: good_rev = 0x0830107a; break; - case 0x60 ... 0x67: good_rev = 0x0860010b; break; - case 0x68 ... 0x6f: good_rev = 0x08608105; break; - case 0x70 ... 0x7f: good_rev = 0x08701032; break; - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + case 0x30 ... 0x3f: good_rev = 0x0830107b; break; + case 0x60 ... 0x67: good_rev = 0x0860010c; break; + case 0x68 ... 0x6f: good_rev = 0x08608107; break; + case 0x70 ... 0x7f: good_rev = 0x08701033; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00009; break; default: return false; -- GitLab From 6bcf9d2cade110ab6a18f3f39210243a053df5a7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 12:28:28 -0500 Subject: [PATCH 1247/1418] NFSD: Fix nfsd_clid_class use of __string_len() macro [ Upstream commit 9388a2aa453321bcf1ad2603959debea9e6ab6d4 ] I'm working on restructuring the __string* macros so that it doesn't need to recalculate the string twice. That is, it will save it off when processing __string() and the __assign_str() will not need to do the work again as it currently does. Currently __string_len(item, src, len) doesn't actually use "src", but my changes will require src to be correct as that is where the __assign_str() will get its value from. The event class nfsd_clid_class has: __string_len(name, name, clp->cl_name.len) But the second "name" does not exist and causes my changes to fail to build. That second parameter should be: clp->cl_name.data. Link: https://lore.kernel.org/linux-trace-kernel/20240222122828.3d8d213c@gandalf.local.home Cc: Neil Brown Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: stable@vger.kernel.org Fixes: d27b74a8675ca ("NFSD: Use new __string_len C macros for nfsd_clid_class") Acked-by: Chuck Lever Acked-by: Jeff Layton Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- fs/nfsd/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 4183819ea0829..84f26f281fe9f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -842,7 +842,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __array(unsigned char, addr, sizeof(struct sockaddr_in6)) __field(unsigned long, flavor) __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __string_len(name, name, clp->cl_name.len) + __string_len(name, clp->cl_name.data, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; -- GitLab From 0e21852b888d0f040dab07122e70f6f297f77340 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 13 Mar 2024 09:34:54 -0400 Subject: [PATCH 1248/1418] net: hns3: tracing: fix hclgevf trace event strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f9952e8d80cca2da3b47ecd5ad9ec16cfd1a649 ] The __string() and __assign_str() helper macros of the TRACE_EVENT() macro are going through some optimizations where only the source string of __string() will be used and the __assign_str() source will be ignored and later removed. To make sure that there's no issues, a new check is added between the __string() src argument and the __assign_str() src argument that does a strcmp() to make sure they are the same string. The hclgevf trace events have: __assign_str(devname, &hdev->nic.kinfo.netdev->name); Which triggers the warning: hclgevf_trace.h:34:39: error: passing argument 1 of ‘strcmp’ from incompatible pointer type [-Werror=incompatible-pointer-types] 34 | __assign_str(devname, &hdev->nic.kinfo.netdev->name); [..] arch/x86/include/asm/string_64.h:75:24: note: expected ‘const char *’ but argument is of type ‘char (*)[16]’ 75 | int strcmp(const char *cs, const char *ct); | ~~~~~~~~~~~~^~ Because __assign_str() now has: WARN_ON_ONCE(__builtin_constant_p(src) ? \ strcmp((src), __data_offsets.dst##_ptr_) : \ (src) != __data_offsets.dst##_ptr_); \ The problem is the '&' on hdev->nic.kinfo.netdev->name. That's because that name is: char name[IFNAMSIZ] Where passing an address '&' of a char array is not compatible with strcmp(). The '&' is not necessary, remove it. Link: https://lore.kernel.org/linux-trace-kernel/20240313093454.3909afe7@gandalf.local.home Cc: netdev Cc: Yisen Zhuang Cc: Salil Mehta Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Yufeng Mo Cc: Huazhong Tan Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Reviewed-by: Jijie Shao Fixes: d8355240cf8fb ("net: hns3: add trace event support for PF/VF mailbox") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h | 8 ++++---- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h index 8510b88d49820..f3cd5a376eca9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h @@ -24,7 +24,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_GET_MBX_LEN) ), @@ -33,7 +33,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), @@ -56,7 +56,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h index 5d4895bb57a17..b259e95dd53c2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h @@ -23,7 +23,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_GET_MBX_LEN) ), @@ -31,7 +31,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), @@ -55,7 +55,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), -- GitLab From 77eae2d257d63c291090b96b0dadcda00115eeb6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: [PATCH 1249/1418] LoongArch: Change __my_cpu_offset definition to avoid mis-optimization [ Upstream commit c87e12e0e8c1241410e758e181ca6bf23efa5b5b ] From GCC commit 3f13154553f8546a ("df-scan: remove ad-hoc handling of global regs in asms"), global registers will no longer be forced to add to the def-use chain. Then current_thread_info(), current_stack_pointer and __my_cpu_offset may be lifted out of the loop because they are no longer treated as "volatile variables". This optimization is still correct for the current_thread_info() and current_stack_pointer usages because they are associated to a thread. However it is wrong for __my_cpu_offset because it is associated to a CPU rather than a thread: if the thread migrates to a different CPU in the loop, __my_cpu_offset should be changed. Change __my_cpu_offset definition to treat it as a "volatile variable", in order to avoid such a mis-optimization. Cc: stable@vger.kernel.org Reported-by: Xiaotian Wu Reported-by: Miao Wang Signed-off-by: Xing Li Signed-off-by: Hongchen Zhang Signed-off-by: Rui Wang Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 302f0e33975a2..c90c560941685 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -25,7 +25,12 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset = off; csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset + +#define __my_cpu_offset \ +({ \ + __asm__ __volatile__("":"+r"(__my_cpu_offset)); \ + __my_cpu_offset; \ +}) #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ -- GitLab From 97cd43ba824aec764f5ea2790d0c0a318f885167 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: [PATCH 1250/1418] LoongArch: Define the __io_aw() hook as mmiowb() [ Upstream commit 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd ] Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of mmiowb()") remove all mmiowb() in drivers, but it says: "NOTE: mmiowb() has only ever guaranteed ordering in conjunction with spin_unlock(). However, pairing each mmiowb() removal in this patch with the corresponding call to spin_unlock() is not at all trivial, so there is a small chance that this change may regress any drivers incorrectly relying on mmiowb() to order MMIO writes between CPUs using lock-free synchronisation." The mmio in radeon_ring_commit() is protected by a mutex rather than a spinlock, but in the mutex fastpath it behaves similar to spinlock. We can add mmiowb() calls in the radeon driver but the maintainer says he doesn't like such a workaround, and radeon is not the only example of mutex protected mmio. So we should extend the mmiowb tracking system from spinlock to mutex, and maybe other locking primitives. This is not easy and error prone, so we solve it in the architectural code, by simply defining the __io_aw() hook as mmiowb(). And we no longer need to override queued_spin_unlock() so use the generic definition. Without this, we get such an error when run 'glxgears' on weak ordering architectures such as LoongArch: radeon 0000:04:00.0: ring 0 stalled for more than 10324msec radeon 0000:04:00.0: ring 3 stalled for more than 10240msec radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3) radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.com/T/#t Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loongson.cn/T/#t Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 402a7d9e3a53e..427d147f30d7f 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -72,6 +72,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) #define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) +#define __io_aw() mmiowb() + #include #define ARCH_HAS_VALID_PHYS_ADDR_RANGE -- GitLab From b7cea3a9af0853fdbb1b16633a458f991dde6aac Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 14 Mar 2024 16:49:09 -0600 Subject: [PATCH 1251/1418] wireguard: netlink: check for dangling peer via is_dead instead of empty list [ Upstream commit 55b6c738673871c9b0edae05d0c97995c1ff08c4 ] If all peers are removed via wg_peer_remove_all(), rather than setting peer_list to empty, the peer is added to a temporary list with a head on the stack of wg_peer_remove_all(). If a netlink dump is resumed and the cursored peer is one that has been removed via wg_peer_remove_all(), it will iterate from that peer and then attempt to dump freed peers. Fix this by instead checking peer->is_dead, which was explictly created for this purpose. Also move up the device_update_lock lockdep assertion, since reading is_dead relies on that. It can be reproduced by a small script like: echo "Setting config..." ip link add dev wg0 type wireguard wg setconf wg0 /big-config ( while true; do echo "Showing config..." wg showconf wg0 > /dev/null done ) & sleep 4 wg setconf wg0 <(printf "[Peer]\nPublicKey=$(wg genkey)\n") Resulting in: BUG: KASAN: slab-use-after-free in __lock_acquire+0x182a/0x1b20 Read of size 8 at addr ffff88811956ec70 by task wg/59 CPU: 2 PID: 59 Comm: wg Not tainted 6.8.0-rc2-debug+ #5 Call Trace: dump_stack_lvl+0x47/0x70 print_address_description.constprop.0+0x2c/0x380 print_report+0xab/0x250 kasan_report+0xba/0xf0 __lock_acquire+0x182a/0x1b20 lock_acquire+0x191/0x4b0 down_read+0x80/0x440 get_peer+0x140/0xcb0 wg_get_device_dump+0x471/0x1130 Cc: stable@vger.kernel.org Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Reported-by: Lillian Berry Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 6d1bd9f52d02a..81eef56773a23 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -255,17 +255,17 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!peers_nest) goto out; ret = 0; - /* If the last cursor was removed via list_del_init in peer_remove, then + lockdep_assert_held(&wg->device_update_lock); + /* If the last cursor was removed in peer_remove or peer_remove_all, then * we just treat this the same as there being no more peers left. The * reason is that seq_nr should indicate to userspace that this isn't a * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || - (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + (ctx->next_peer && ctx->next_peer->is_dead)) { nla_nest_cancel(skb, peers_nest); goto out; } - lockdep_assert_held(&wg->device_update_lock); peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { if (get_peer(peer, skb, ctx)) { -- GitLab From 09c3fa70f65175861ca948cb2f0f791e666c90e5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 14 Mar 2024 16:49:10 -0600 Subject: [PATCH 1252/1418] wireguard: netlink: access device through ctx instead of peer [ Upstream commit 71cbd32e3db82ea4a74e3ef9aeeaa6971969c86f ] The previous commit fixed a bug that led to a NULL peer->device being dereferenced. It's actually easier and faster performance-wise to instead get the device from ctx->wg. This semantically makes more sense too, since ctx->wg->peer_allowedips.seq is compared with ctx->allowedips_seq, basing them both in ctx. This also acts as a defence in depth provision against freed peers. Cc: stable@vger.kernel.org Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 81eef56773a23..81b716e6612e2 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -164,8 +164,8 @@ get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) if (!allowedips_node) goto no_allowedips; if (!ctx->allowedips_seq) - ctx->allowedips_seq = peer->device->peer_allowedips.seq; - else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + ctx->allowedips_seq = ctx->wg->peer_allowedips.seq; + else if (ctx->allowedips_seq != ctx->wg->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); -- GitLab From 25bc986570219f98ded2bf9b335191c80948508b Mon Sep 17 00:00:00 2001 From: "Andrey Jr. Melnikov" Date: Wed, 14 Feb 2024 17:57:57 +0100 Subject: [PATCH 1253/1418] ahci: asm1064: correct count of reported ports [ Upstream commit 9815e39617541ef52d0dfac4be274ad378c6dc09 ] The ASM1064 SATA host controller always reports wrongly, that it has 24 ports. But in reality, it only has four ports. before: ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xffff0f impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst after: ahci 0000:04:00.0: ASM1064 has only four ports ahci 0000:04:00.0: forcing port_map 0xffff0f -> 0xf ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xf impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst Signed-off-by: "Andrey Jr. Melnikov" Signed-off-by: Niklas Cassel Stable-dep-of: 6cd8adc3e189 ("ahci: asm1064: asm1166: don't limit reported ports") Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1790a2ecb9fac..9de1731b6b444 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,9 +671,17 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { + switch (pdev->device) { + case 0x1166: + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + break; + case 0x1064: + dev_info(&pdev->dev, "ASM1064 has only four ports\n"); + hpriv->saved_port_map = 0xf; + break; + } } if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { -- GitLab From 139845240d2731733a4f548e8da2c61d7d708bf5 Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Wed, 13 Mar 2024 22:46:50 +0100 Subject: [PATCH 1254/1418] ahci: asm1064: asm1166: don't limit reported ports [ Upstream commit 6cd8adc3e18960f6e59d797285ed34ef473cc896 ] Previously, patches have been added to limit the reported count of SATA ports for asm1064 and asm1166 SATA controllers, as those controllers do report more ports than physically having. While it is allowed to report more ports than physically having in CAP.NP, it is not allowed to report more ports than physically having in the PI (Ports Implemented) register, which is what these HBAs do. (This is a AHCI spec violation.) Unfortunately, it seems that the PMP implementation in these ASMedia HBAs is also violating the AHCI and SATA-IO PMP specification. What these HBAs do is that they do not report that they support PMP (CAP.SPM (Supports Port Multiplier) is not set). Instead, they have decided to add extra "virtual" ports in the PI register that is used if a port multiplier is connected to any of the physical ports of the HBA. Enumerating the devices behind the PMP as specified in the AHCI and SATA-IO specifications, by using PMP READ and PMP WRITE commands to the physical ports of the HBA is not possible, you have to use the "virtual" ports. This is of course bad, because this gives us no way to detect the device and vendor ID of the PMP actually connected to the HBA, which means that we can not apply the proper PMP quirks for the PMP that is connected to the HBA. Limiting the port map will thus stop these controllers from working with SATA Port Multipliers. This patch reverts both patches for asm1064 and asm1166, so old behavior is restored and SATA PMP will work again, but it will also reintroduce the (minutes long) extra boot time for the ASMedia controllers that do not have a PMP connected (either on the PCIe card itself, or an external PMP). However, a longer boot time for some, is the lesser evil compared to some other users not being able to detect their drives at all. Fixes: 0077a504e1a4 ("ahci: asm1166: correct count of reported ports") Fixes: 9815e3961754 ("ahci: asm1064: correct count of reported ports") Cc: stable@vger.kernel.org Reported-by: Matt Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede [cassel: rewrote commit message] Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 9de1731b6b444..17119e8dc8c30 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,19 +671,6 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { - switch (pdev->device) { - case 0x1166: - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; - break; - case 0x1064: - dev_info(&pdev->dev, "ASM1064 has only four ports\n"); - hpriv->saved_port_map = 0xf; - break; - } - } - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; -- GitLab From 589c414138a1bed98e652c905937d8f790804efe Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 11 Mar 2024 18:07:34 -0400 Subject: [PATCH 1255/1418] drm/amdgpu: amdgpu_ttm_gart_bind set gtt bound flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c6064cbe58b43533e3451ad6a8ba9736c109ac3 ] Otherwise after the GTT bo is released, the GTT and gart space is freed but amdgpu_ttm_backend_unbind will not clear the gart page table entry and leave valid mapping entry pointing to the stale system page. Then if GPU access the gart address mistakely, it will read undefined value instead page fault, harder to debug and reproduce the real issue. Cc: stable@vger.kernel.org Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 158b791883f03..dfb9d42007730 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -838,6 +838,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; } /* -- GitLab From 0581a4dac6616b80712f6524efcd4ddaf6ca1942 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 14 Feb 2024 13:29:51 -0700 Subject: [PATCH 1256/1418] drm/amd/display: Return the correct HDCP error code [ Upstream commit e64b3f55e458ce7e2087a0051f47edabf74545e7 ] [WHY & HOW] If the display is null when creating an HDCP session, return a proper error code. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index ee67a35c2a8ed..ff930a71e496a 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -513,6 +513,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index; if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0) -- GitLab From 3070c70a3e46165e4828d519944694694d76a963 Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Fri, 28 Jul 2023 08:35:07 -0400 Subject: [PATCH 1257/1418] drm/amd/display: Fix noise issue on HDMI AV mute [ Upstream commit 69e3be6893a7e668660b05a966bead82bbddb01d ] [Why] When mode switching is triggered there is momentary noise visible on some HDMI TV or displays. [How] Wait for 2 frames to make sure we have enough time to send out AV mute and sink receives a full frame. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Acked-by: Wayne Lin Signed-off-by: Leo Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 72bec33e371f3..0225b2c96041d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -651,10 +651,20 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) if (pipe_ctx == NULL) return; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) + if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) { pipe_ctx->stream_res.stream_enc->funcs->set_avmute( pipe_ctx->stream_res.stream_enc, enable); + + /* Wait for two frame to make sure AV mute is sent out */ + if (enable) { + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + } + } } void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx) -- GitLab From 3d47eb405781cc5127deca9a14e24b27696087a1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Mar 2024 18:43:11 +0100 Subject: [PATCH 1258/1418] dm snapshot: fix lockup in dm_exception_table_exit [ Upstream commit 6e7132ed3c07bd8a6ce3db4bb307ef2852b322dc ] There was reported lockup when we exit a snapshot with many exceptions. Fix this by adding "cond_resched" to the loop that frees the exceptions. Reported-by: John Pittman Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b748901a4fb55..1c601508ce0b4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -679,8 +679,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et, for (i = 0; i < size; i++) { slot = et->table + i; - hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } kvfree(et->table); -- GitLab From 62e06fdd180a61560dc844aac89b9aafde2348f2 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Thu, 14 Mar 2024 14:26:56 +0000 Subject: [PATCH 1259/1418] x86/pm: Work around false positive kmemleak report in msr_build_context() [ Upstream commit e3f269ed0accbb22aa8f25d2daffa23c3fccd407 ] Since: 7ee18d677989 ("x86/power: Make restore_processor_context() sane") kmemleak reports this issue: unreferenced object 0xf68241e0 (size 32): comm "swapper/0", pid 1, jiffies 4294668610 (age 68.432s) hex dump (first 32 bytes): 00 cc cc cc 29 10 01 c0 00 00 00 00 00 00 00 00 ....)........... 00 42 82 f6 cc cc cc cc cc cc cc cc cc cc cc cc .B.............. backtrace: [<461c1d50>] __kmem_cache_alloc_node+0x106/0x260 [] __kmalloc+0x54/0x160 [] msr_build_context.constprop.0+0x35/0x100 [<46635aff>] pm_check_save_msr+0x63/0x80 [<6b6bb938>] do_one_initcall+0x41/0x1f0 [<3f3add60>] kernel_init_freeable+0x199/0x1e8 [<3b538fde>] kernel_init+0x1a/0x110 [<938ae2b2>] ret_from_fork+0x1c/0x28 Which is a false positive. Reproducer: - Run rsync of whole kernel tree (multiple times if needed). - start a kmemleak scan - Note this is just an example: a lot of our internal tests hit these. The root cause is similar to the fix in: b0b592cf0836 x86/pm: Fix false positive kmemleak report in msr_build_context() ie. the alignment within the packed struct saved_context which has everything unaligned as there is only "u16 gs;" at start of struct where in the past there were four u16 there thus aligning everything afterwards. The issue is with the fact that Kmemleak only searches for pointers that are aligned (see how pointers are scanned in kmemleak.c) so when the struct members are not aligned it doesn't see them. Testing: We run a lot of tests with our CI, and after applying this fix we do not see any kmemleak issues any more whilst without it we see hundreds of the above report. From a single, simple test run consisting of 416 individual test cases on kernel 5.10 x86 with kmemleak enabled we got 20 failures due to this, which is quite a lot. With this fix applied we get zero kmemleak related failures. Fixes: 7ee18d677989 ("x86/power: Make restore_processor_context() sane") Signed-off-by: Anton Altaparmakov Signed-off-by: Ingo Molnar Acked-by: "Rafael J. Wysocki" Cc: stable@vger.kernel.org Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240314142656.17699-1-anton@tuxera.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/suspend_32.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a800abb1a9925..d8416b3bf832e 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,11 +12,6 @@ /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers except gs are saved at kernel - * entry in pt_regs. - */ - u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; struct saved_msrs saved_msrs; @@ -27,6 +22,11 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; bool misc_enable_saved; } __attribute__((packed)); -- GitLab From e82dc11315a7a113dc695e4feca54ecd5f1570b4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Mar 2024 15:21:45 +0100 Subject: [PATCH 1260/1418] cpufreq: brcmstb-avs-cpufreq: fix up "add check for cpufreq_cpu_get's return value" In commit e72160cb6e23 ("cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value"), build warnings occur because a variable is created after some logic, resulting in: drivers/cpufreq/brcmstb-avs-cpufreq.c: In function 'brcm_avs_cpufreq_get': drivers/cpufreq/brcmstb-avs-cpufreq.c:486:9: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 486 | struct private_data *priv = policy->driver_data; | ^~~~~~ cc1: all warnings being treated as errors make[2]: *** [scripts/Makefile.build:289: drivers/cpufreq/brcmstb-avs-cpufreq.o] Error 1 make[1]: *** [scripts/Makefile.build:552: drivers/cpufreq] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:1907: drivers] Error 2 Fix this up. Link: https://lore.kernel.org/r/e114d9e5-26af-42be-9baa-72c3a6ec8fe5@oracle.com Link: https://lore.kernel.org/stable/20240327015023.GC7502@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/T/#m15bff0fe96986ef780e848b4fff362bf8ea03f08 Reported-by: Harshit Mogalapalli Reported-by: Linux Kernel Functional Testing Fixes: e72160cb6e23 ("cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value") Cc: Anastasia Belova Cc: Viresh Kumar Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 38ec0fedb247f..552db816ed22c 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,10 +481,11 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct private_data *priv; + if (!policy) return 0; - struct private_data *priv = policy->driver_data; - + priv = policy->driver_data; cpufreq_cpu_put(policy); return brcm_avs_get_frequency(priv->base); -- GitLab From 36c676e2ed3609dc8d50ba763eee2ba4e4b493c2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 4 Mar 2024 14:43:55 +0100 Subject: [PATCH 1261/1418] platform/x86: p2sb: On Goldmont only cache P2SB and SPI devfn BAR commit aec7d25b497ce4a8d044e9496de0aa433f7f8f06 upstream. On Goldmont p2sb_bar() only ever gets called for 2 devices, the actual P2SB devfn 13,0 and the SPI controller which is part of the P2SB, devfn 13,2. But the current p2sb code tries to cache BAR0 info for all of devfn 13,0 to 13,7 . This involves calling pci_scan_single_device() for device 13 functions 0-7 and the hw does not seem to like pci_scan_single_device() getting called for some of the other hidden devices. E.g. on an ASUS VivoBook D540NV-GQ065T this leads to continuous ACPI errors leading to high CPU usage. Fix this by only caching BAR0 info and thus only calling pci_scan_single_device() for the P2SB and the SPI controller. Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") Reported-by: Danil Rybakov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218531 Tested-by: Danil Rybakov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240304134356.305375-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/p2sb.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 17cc4b45e0239..a64f56ddd4a44 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,9 +20,11 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; @@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { - unsigned int slot, fn; - - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; -- GitLab From 196f198ca6fce04ba6ce262f5a0e4d567d7d219d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:20 -0800 Subject: [PATCH 1262/1418] tls: fix race between tx work scheduling and socket close commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb upstream. Similarly to previous commit, the submitting thread (recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete(). Reorder scheduling the work before calling complete(). This seems more logical in the first place, as it's the inverse order of what the submitting thread will do. Reported-by: valis Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller [Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older] Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2bd27b77769cb..d53587ff9ddea 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -449,7 +449,6 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct scatterlist *sge; struct sk_msg *msg_en; struct tls_rec *rec; - bool ready = false; struct sock *sk; rec = container_of(aead_req, struct tls_rec, aead_req); @@ -486,19 +485,16 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - - if (!ready) - return; - - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) -- GitLab From 406b0241d0eb598a0b330ab20ae325537d8d8163 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Mar 2024 14:22:12 +0100 Subject: [PATCH 1263/1418] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream. While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0a86c019a75de..4e0a34fbede38 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5132,6 +5132,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); -- GitLab From 72c1efe3f247a581667b7d368fff3bd9a03cd57a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 00:11:10 +0100 Subject: [PATCH 1264/1418] netfilter: nf_tables: disallow anonymous set with timeout flag commit 16603605b667b70da974bea8216c93e7db043bf1 upstream. Anonymous sets are never used with timeout from userspace, reject this. Exception to this rule is NFT_SET_EVAL to ensure legacy meters still work. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4e0a34fbede38..1706605139e36 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4711,6 +4711,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; -- GitLab From 73e4f955b3f8daf0fcc548f945d9bb50024fb1de Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 01:04:11 +0100 Subject: [PATCH 1265/1418] netfilter: nf_tables: reject constant set with timeout commit 5f4fc4bd5cddb4770ab120ce44f02695c4505562 upstream. This set combination is weird: it allows for elements to be added/deleted, but once bound to the rule it cannot be updated anymore. Eventually, all elements expire, leading to an empty set which cannot be updated anymore. Reject this flags combination. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1706605139e36..2a5d9075a081d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4714,6 +4714,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; -- GitLab From 4f34b79c77ec49263e1630ab2d8cacf9e9a3dcba Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 28 Feb 2024 16:45:33 -0800 Subject: [PATCH 1266/1418] Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory commit b8209544296edbd1af186e2ea9c648642c37b18c upstream. The VMBUS_RING_SIZE macro adds space for a ring buffer header to the requested ring buffer size. The header size is always 1 page, and so its size varies based on the PAGE_SIZE for which the kernel is built. If the requested ring buffer size is a large power-of-2 size and the header size is small, the resulting size is inefficient in its use of memory. For example, a 512 Kbyte ring buffer with a 4 Kbyte page size results in a 516 Kbyte allocation, which is rounded to up 1 Mbyte by the memory allocator, and wastes 508 Kbytes of memory. In such situations, the exact size of the ring buffer isn't that important, and it's OK to allocate the 4 Kbyte header at the beginning of the 512 Kbytes, leaving the ring buffer itself with just 508 Kbytes. The memory allocation can be 512 Kbytes instead of 1 Mbyte and nothing is wasted. Update VMBUS_RING_SIZE to implement this approach for "large" ring buffer sizes. "Large" is somewhat arbitrarily defined as 8 times the size of the ring buffer header (which is of size PAGE_SIZE). For example, for 4 Kbyte PAGE_SIZE, ring buffers of 32 Kbytes and larger use the first 4 Kbytes as the ring buffer header. For 64 Kbyte PAGE_SIZE, ring buffers of 512 Kbytes and larger use the first 64 Kbytes as the ring buffer header. In both cases, smaller sizes add space for the header so the ring size isn't reduced too much by using part of the space for the header. For example, with a 64 Kbyte page size, we don't want a 128 Kbyte ring buffer to be reduced to 64 Kbytes by allocating half of the space for the header. In such a case, the memory allocation is less efficient, but it's the best that can be done. While the new algorithm slightly changes the amount of space allocated for ring buffers by drivers that use VMBUS_RING_SIZE, the devices aren't known to be sensitive to small changes in ring buffer size, so there shouldn't be any effect. Fixes: c1135c7fd0e9 ("Drivers: hv: vmbus: Introduce types of GPADL") Fixes: 6941f67ad37d ("hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218502 Cc: stable@vger.kernel.org Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Reviewed-by: Dexuan Cui Tested-by: Souradeep Chakrabarti Link: https://lore.kernel.org/r/20240229004533.313662-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240229004533.313662-1-mhklinux@outlook.com> Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 646f1da9f27e0..4fbd5d8417111 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { -- GitLab From 408837d7ef6698e3cec90ff9ccecb18e66880531 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 21 Feb 2024 14:46:21 -0700 Subject: [PATCH 1267/1418] xfrm: Avoid clang fortify warning in copy_to_user_tmpl() commit 1a807e46aa93ebad1dfbed4f82dc3bf779423a6e upstream. After a couple recent changes in LLVM, there is a warning (or error with CONFIG_WERROR=y or W=e) from the compile time fortify source routines, specifically the memset() in copy_to_user_tmpl(). In file included from net/xfrm/xfrm_user.c:14: ... include/linux/fortify-string.h:438:4: error: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning] 438 | __write_overflow_field(p_size_field, size); | ^ 1 error generated. While ->xfrm_nr has been validated against XFRM_MAX_DEPTH when its value is first assigned in copy_templates() by calling validate_tmpl() first (so there should not be any issue in practice), LLVM/clang cannot really deduce that across the boundaries of these functions. Without that knowledge, it cannot assume that the loop stops before i is greater than XFRM_MAX_DEPTH, which would indeed result a stack buffer overflow in the memset(). To make the bounds of ->xfrm_nr clear to the compiler and add additional defense in case copy_to_user_tmpl() is ever used in a path where ->xfrm_nr has not been properly validated against XFRM_MAX_DEPTH first, add an explicit bound check and early return, which clears up the warning. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1985 Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d042ca01211fa..0cc4ed29e9015 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1979,6 +1979,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; -- GitLab From d24a79ccdf25d35ed9515483225fabcdba1e24c4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Feb 2024 09:08:27 -0800 Subject: [PATCH 1268/1418] init/Kconfig: lower GCC version check for -Warray-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3e00f5802fabf2f504070a591b14b648523ede13 upstream. We continue to see false positives from -Warray-bounds even in GCC 10, which is getting reported in a few places[1] still: security/security.c:811:2: warning: `memcpy' offset 32 is out of the bounds [0, 0] [-Warray-bounds] Lower the GCC version check from 11 to 10. Link: https://lkml.kernel.org/r/20240223170824.work.768-kees@kernel.org Reported-by: Lu Yao Closes: https://lore.kernel.org/lkml/20240117014541.8887-1-yaolu@kylinos.cn/ Link: https://lore.kernel.org/linux-next/65d84438.620a0220.7d171.81a7@mx.google.com [1] Signed-off-by: Kees Cook Reviewed-by: Paul Moore Cc: Ard Biesheuvel Cc: Christophe Leroy Cc: Greg Kroah-Hartman Cc: "Gustavo A. R. Silva" Cc: Johannes Weiner Cc: Marc Aurèle La France Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nhat Pham Cc: Petr Mladek Cc: Randy Dunlap Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index ffb927bf6034f..b63dce6706c5c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -902,14 +902,14 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-11+ array-bounds globally. +# Currently, disable gcc-10+ array-bounds globally. # It's still broken in gcc-13, so no upper bound yet. -config GCC11_NO_ARRAY_BOUNDS +config GCC10_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS # # For architectures that know their GCC __int128 support is sound -- GitLab From 726374dde5d608b15b9756bd52b6fc283fda7a06 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 14 Feb 2024 17:00:03 -0800 Subject: [PATCH 1269/1418] KVM: x86: Mark target gfn of emulated atomic instruction as dirty commit 910c57dfa4d113aae6571c2a8b9ae8c430975902 upstream. When emulating an atomic access on behalf of the guest, mark the target gfn dirty if the CMPXCHG by KVM is attempted and doesn't fault. This fixes a bug where KVM effectively corrupts guest memory during live migration by writing to guest memory without informing userspace that the page is dirty. Marking the page dirty got unintentionally dropped when KVM's emulated CMPXCHG was converted to do a user access. Before that, KVM explicitly mapped the guest page into kernel memory, and marked the page dirty during the unmap phase. Mark the page dirty even if the CMPXCHG fails, as the old data is written back on failure, i.e. the page is still written. The value written is guaranteed to be the same because the operation is atomic, but KVM's ABI is that all writes are dirty logged regardless of the value written. And more importantly, that's what KVM did before the buggy commit. Huge kudos to the folks on the Cc list (and many others), who did all the actual work of triaging and debugging. Fixes: 1c2361f667f3 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses") Cc: stable@vger.kernel.org Cc: David Matlack Cc: Pasha Tatashin Cc: Michael Krebs base-commit: 6769ea8da8a93ed4630f1ce64df6aafcaabfce64 Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20240215010004.1456078-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 688bc7b72eb66..0e6e63a8f0949 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7758,6 +7758,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (r < 0) return X86EMUL_UNHANDLEABLE; + + /* + * Mark the page dirty _before_ checking whether or not the CMPXCHG was + * successful, as the old value is written back on failure. Note, for + * live migration, this is unnecessarily conservative as CMPXCHG writes + * back the original value and the access is atomic, but KVM's ABI is + * that all writes are dirty logged, regardless of the value written. + */ + kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa)); + if (r) return X86EMUL_CMPXCHG_FAILED; -- GitLab From 4868c0ecdb6cfde7c70cf478c46e06bb9c7e5865 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 Feb 2024 17:34:30 -0800 Subject: [PATCH 1270/1418] KVM: SVM: Flush pages under kvm->lock to fix UAF in svm_register_enc_region() commit 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807 upstream. Do the cache flush of converted pages in svm_register_enc_region() before dropping kvm->lock to fix use-after-free issues where region and/or its array of pages could be freed by a different task, e.g. if userspace has __unregister_enc_region_locked() already queued up for the region. Note, the "obvious" alternative of using local variables doesn't fully resolve the bug, as region->pages is also dynamically allocated. I.e. the region structure itself would be fine, but region->pages could be freed. Flushing multiple pages under kvm->lock is unfortunate, but the entire flow is a rare slow path, and the manual flush is only needed on CPUs that lack coherency for encrypted memory. Fixes: 19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region") Reported-by: Gabe Kirkpatrick Cc: Josh Eads Cc: Peter Gonda Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20240217013430.2079561-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/sev.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3060fe4e9731a..3dc0ee1fe9db9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1958,20 +1958,22 @@ int sev_mem_enc_register_region(struct kvm *kvm, goto e_free; } - region->uaddr = range->addr; - region->size = range->size; - - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are * flushed to ensure that guest data gets written into memory with - * correct C-bit. + * correct C-bit. Note, this must be done before dropping kvm->lock, + * as region and its array of pages can be freed by a different task + * once kvm->lock is released. */ sev_clflush_pages(region->pages, region->npages); + region->uaddr = range->addr; + region->size = range->size; + + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + return ret; e_free: -- GitLab From 3a70b1c9326457c7bd93974d77b2ba55a742e742 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:05 -0500 Subject: [PATCH 1271/1418] tracing: Use .flush() call to wake up readers commit e5d7c1916562f0e856eb3d6f569629fcd535fed2 upstream. The .release() function does not get called until all readers of a file descriptor are finished. If a thread is blocked on reading a file descriptor in ring_buffer_wait(), and another thread closes the file descriptor, it will not wake up the other thread as ring_buffer_wake_waiters() is called by .release(), and that will not get called until the .read() is finished. The issue originally showed up in trace-cmd, but the readers are actually other processes with their own file descriptors. So calling close() would wake up the other tasks because they are blocked on another descriptor then the one that was closed(). But there's other wake ups that solve that issue. When a thread is blocked on a read, it can still hang even when another thread closed its descriptor. This is what the .flush() callback is for. Have the .flush() wake up the readers. Link: https://lore.kernel.org/linux-trace-kernel/20240308202432.107909457@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f667d6bdddda5..f2b00ea38111a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8278,6 +8278,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -8289,12 +8303,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); - iter->wait_index++; - /* Make sure the waiters see the new wait_index */ - smp_wmb(); - - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8508,6 +8516,7 @@ static const struct file_operations tracing_buffers_fops = { .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, -- GitLab From 7f122486a06107438cd58aa32ab4a55c2bb60a3c Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 1 Mar 2024 15:36:58 +0800 Subject: [PATCH 1272/1418] drm/amdgpu/pm: Fix the error of pwm1_enable setting commit 0dafaf659cc463f2db0af92003313a8bc46781cd upstream. Fix the pwm_mode value error which used for pwm1_enable setting Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 29f3d8431089e..cdb406690b7e7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2344,6 +2344,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); int err, ret; + u32 pwm_mode; int value; if (amdgpu_in_reset(adev)) @@ -2355,13 +2356,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; + if (value == 0) + pwm_mode = AMD_FAN_CTRL_NONE; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else if (value == 2) + pwm_mode = AMD_FAN_CTRL_AUTO; + else + return -EINVAL; + ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (ret < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; } - ret = amdgpu_dpm_set_fan_control_mode(adev, value); + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); -- GitLab From 2eadf1adf513f798172c50fccbaf6ce037ac7e9b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 19 Feb 2024 13:50:47 +0100 Subject: [PATCH 1273/1418] drm/i915: Check before removing mm notifier commit 01bb1ae35006e473138c90711bad1a6b614a1823 upstream. Error in mmu_interval_notifier_insert() can leave a NULL notifier.mm pointer. Catch that and return early. Fixes: ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.") Cc: # v5.13+ [tursulin: Added Fixes and cc stable.] Cc: Andi Shyti Cc: Shawn Lee Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240219125047.28906-1-nirmoy.das@intel.com Signed-off-by: Tvrtko Ursulin (cherry picked from commit db7bbd13f08774cde0332c705f042e327fe21e73) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index ba14b18d65f38..2e7c52c2e47dd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -378,6 +378,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } -- GitLab From e5b3ad3e8ee4cc88870353a47d6a14661fe1566b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 1 Mar 2024 15:29:50 +0800 Subject: [PATCH 1274/1418] ALSA: hda/realtek - Fix headset Mic no show at resume back for Lenovo ALC897 platform commit d397b6e56151099cf3b1f7bfccb204a6a8591720 upstream. Headset Mic will no show at resume back. This patch will fix this issue. Fixes: d7f32791a9fc ("ALSA: hda/realtek - Add headset Mic support for Lenovo ALC897 platform") Cc: Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/4713d48a372e47f98bba0c6120fd8254@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6e759032eba2e..0b58536fc15bb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11439,8 +11439,7 @@ static void alc897_hp_automute_hook(struct hda_codec *codec, snd_hda_gen_hp_automute(codec, jack); vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; - snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - vref); + snd_hda_set_pin_ctl(codec, 0x1b, vref); } static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, @@ -11449,6 +11448,10 @@ static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->gen.hp_automute_hook = alc897_hp_automute_hook; + spec->no_shutup_pins = 1; + } + if (action == HDA_FIXUP_ACT_PROBE) { + snd_hda_set_pin_ctl_cache(codec, 0x1a, PIN_IN | AC_PINCTL_VREF_100); } } -- GitLab From f42ba916689f5c7b1642092266d2f53cf527aaaa Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 29 Feb 2024 14:30:06 -0500 Subject: [PATCH 1275/1418] USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command commit 014bcf41d946b36a8f0b8e9b5d9529efbb822f49 upstream. The isd200 sub-driver in usb-storage uses the HEADS and SECTORS values in the ATA ID information to calculate cylinder and head values when creating a CDB for READ or WRITE commands. The calculation involves division and modulus operations, which will cause a crash if either of these values is 0. While this never happens with a genuine device, it could happen with a flawed or subversive emulation, as reported by the syzbot fuzzer. Protect against this possibility by refusing to bind to the device if either the ATA_ID_HEADS or ATA_ID_SECTORS value in the device's ID information is 0. This requires isd200_Initialization() to return a negative error code when initialization fails; currently it always returns 0 (even when there is an error). Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+28748250ab47a8f04100@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-usb/0000000000003eb868061245ba7f@google.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: PrasannaKumar Muralidharan Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/b1e605ea-333f-4ac0-9511-da04f411763e@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/isd200.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 4e0eef1440b7f..300aeef160e75 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } -- GitLab From 7664ee8bd80309b90d53488b619764f0a057f2b7 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 28 Feb 2024 17:24:41 +0530 Subject: [PATCH 1276/1418] usb: gadget: ncm: Fix handling of zero block length packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f90ce1e04cbcc76639d6cba0fdbd820cd80b3c70 upstream. While connecting to a Linux host with CDC_NCM_NTB_DEF_SIZE_TX set to 65536, it has been observed that we receive short packets, which come at interval of 5-10 seconds sometimes and have block length zero but still contain 1-2 valid datagrams present. According to the NCM spec: "If wBlockLength = 0x0000, the block is terminated by a short packet. In this case, the USB transfer must still be shorter than dwNtbInMaxSize or dwNtbOutMaxSize. If exactly dwNtbInMaxSize or dwNtbOutMaxSize bytes are sent, and the size is a multiple of wMaxPacketSize for the given pipe, then no ZLP shall be sent. wBlockLength= 0x0000 must be used with extreme care, because of the possibility that the host and device may get out of sync, and because of test issues. wBlockLength = 0x0000 allows the sender to reduce latency by starting to send a very large NTB, and then shortening it when the sender discovers that there’s not sufficient data to justify sending a large NTB" However, there is a potential issue with the current implementation, as it checks for the occurrence of multiple NTBs in a single giveback by verifying if the leftover bytes to be processed is zero or not. If the block length reads zero, we would process the same NTB infintely because the leftover bytes is never zero and it leads to a crash. Fix this by bailing out if block length reads zero. Cc: stable@vger.kernel.org Fixes: 427694cfaafa ("usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240228115441.2105585-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 5e78fcc63e4d3..14601a2d25427 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1348,7 +1348,7 @@ parse_ntb: if (to_process == 1 && (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { to_process--; - } else if (to_process > 0) { + } else if ((to_process > 0) && (block_len != 0)) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- GitLab From ccaa4fb8dc4f2fac50aa68698e915fc872b133de Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 23 Feb 2024 01:33:43 +0200 Subject: [PATCH 1277/1418] usb: port: Don't try to peer unused USB ports based on location commit 69c63350e573367f9c8594162288cffa8a26d0d1 upstream. Unused USB ports may have bogus location data in ACPI PLD tables. This causes port peering failures as these unused USB2 and USB3 ports location may match. Due to these failures the driver prints a "usb: port power management may be unreliable" warning, and unnecessarily blocks port power off during runtime suspend. This was debugged on a couple DELL systems where the unused ports all returned zeroes in their location data. Similar bugreports exist for other systems. Don't try to peer or match ports that have connect type set to USB_PORT_NOT_USED. Fixes: 3bfd659baec8 ("usb: find internal hub tier mismatch via acpi") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218465 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218486 Tested-by: Paul Menzel Link: https://lore.kernel.org/linux-usb/5406d361-f5b7-4309-b0e6-8c94408f7d75@molgen.mpg.de Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Mathias Nyman Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218490 Link: https://lore.kernel.org/r/20240222233343.71856-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 38c1a4f4fdeae..17aef216cb501 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -534,7 +534,7 @@ static int match_location(struct usb_device *peer_hdev, void *p) struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); - if (!peer_hub) + if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); @@ -545,7 +545,8 @@ static int match_location(struct usb_device *peer_hdev, void *p) for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; - if (peer && peer->location == port_dev->location) { + if (peer && peer->connect_type != USB_PORT_NOT_USED && + peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } -- GitLab From d40ab69217481d938b3434f5218032f59a58b935 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Tue, 5 Mar 2024 09:57:06 +0800 Subject: [PATCH 1278/1418] tty: serial: fsl_lpuart: avoid idle preamble pending if CTS is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 74cb7e0355fae9641f825afa389d3fba3b617714 upstream. If the remote uart device is not connected or not enabled after booting up, the CTS line is high by default. At this time, if we enable the flow control when opening the device(for example, using “stty -F /dev/ttyLP4 crtscts” command), there will be a pending idle preamble(first writing 0 and then writing 1 to UARTCTRL_TE will queue an idle preamble) that cannot be sent out, resulting in the uart port fail to close(waiting for TX empty), so the user space stty will have to wait for a long time or forever. This is an LPUART IP bug(idle preamble has higher priority than CTS), here add a workaround patch to enable TX CTS after enabling UARTCTRL_TE, so that the idle preamble does not get stuck due to CTS is deasserted. Fixes: 380c966c093e ("tty: serial: fsl_lpuart: add 32-bit register interface support") Cc: stable Signed-off-by: Sherry Sun Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240305015706.1050769-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c5a9b89c4d313..f94c782638686 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2213,9 +2213,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(&sport->port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - lpuart32_write(&sport->port, modem, UARTMODIR); - lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ + lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ + lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* re-enable the CTS if needed */ + lpuart32_write(&sport->port, modem, UARTMODIR); if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE) sport->is_cs7 = true; -- GitLab From 4154e767354140db7804207117e7238fb337b0e7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Feb 2024 20:00:35 +0100 Subject: [PATCH 1279/1418] misc: lis3lv02d_i2c: Fix regulators getting en-/dis-abled twice on suspend/resume commit ac3e0384073b2408d6cb0d972fee9fcc3776053d upstream. When not configured for wakeup lis3lv02d_i2c_suspend() will call lis3lv02d_poweroff() even if the device has already been turned off by the runtime-suspend handler and if configured for wakeup and the device is runtime-suspended at this point then it is not turned back on to serve as a wakeup source. Before commit b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback"), lis3lv02d_poweroff() failed to disable the regulators which as a side effect made calling poweroff() twice ok. Now that poweroff() correctly disables the regulators, doing this twice triggers a WARN() in the regulator core: unbalanced disables for regulator-dummy WARNING: CPU: 1 PID: 92 at drivers/regulator/core.c:2999 _regulator_disable ... Fix lis3lv02d_i2c_suspend() to not call poweroff() a second time if already runtime-suspended and add a poweron() call when necessary to make wakeup work. lis3lv02d_i2c_resume() has similar issues, with an added weirness that it always powers on the device if it is runtime suspended, after which the first runtime-resume will call poweron() again, causing the enabled count for the regulator to increase by 1 every suspend/resume. These unbalanced regulator_enable() calls cause the regulator to never be turned off and trigger the following WARN() on driver unbind: WARNING: CPU: 1 PID: 1724 at drivers/regulator/core.c:2396 _regulator_put Fix this by making lis3lv02d_i2c_resume() mirror the new suspend(). Fixes: b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback") Reported-by: Paul Menzel Closes: https://lore.kernel.org/regressions/5fc6da74-af0a-4aac-b4d5-a000b39a63a5@molgen.mpg.de/ Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Signed-off-by: Hans de Goede Tested-by: Paul Menzel # Dell XPS 15 7590 Reviewed-by: Paul Menzel Link: https://lore.kernel.org/r/20240220190035.53402-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index fdec2c30eb165..63c717053e36b 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -199,8 +199,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata || !lis3->pdata->wakeup_flags) + /* Turn on for wakeup if turned off by runtime suspend */ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + /* For non wakeup turn off if not already turned off by runtime suspend */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweroff(lis3); + return 0; } @@ -209,13 +215,12 @@ static int lis3lv02d_i2c_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - /* - * pm_runtime documentation says that devices should always - * be powered on at resume. Pm_runtime turns them off after system - * wide resume is complete. - */ - if (!lis3->pdata || !lis3->pdata->wakeup_flags || - pm_runtime_suspended(dev)) + /* Turn back off if turned on for wakeup and runtime suspended*/ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweroff(lis3); + /* For non wakeup turn back on if not runtime suspended */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweron(lis3); return 0; -- GitLab From 2d56aca54847770fe847a0cf31df031a6f9e66c9 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:11 +0200 Subject: [PATCH 1280/1418] mei: me: add arrow lake point S DID commit 7a9b9012043e126f6d6f4683e67409312d1b707b upstream. Add Arrow Lake S device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bdc65d50b945f..fd598f4ec774a 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -112,6 +112,7 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 5bf0d50d55a00..d2ada09019c90 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -119,6 +119,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- GitLab From 99cd2d491d1c5b72b6430925f4da9ba8ed326463 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:12 +0200 Subject: [PATCH 1281/1418] mei: me: add arrow lake point H DID commit 8436f25802ec028ac7254990893f3e01926d9b79 upstream. Add Arrow Lake H device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index fd598f4ec774a..3390ff5111033 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -113,6 +113,7 @@ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ #define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d2ada09019c90..f8219cbd2c7ce 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -120,6 +120,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- GitLab From 994a1e583c0c206c8ca7d03334a65b79f4d8bc51 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 29 Feb 2024 17:15:27 -0500 Subject: [PATCH 1282/1418] vt: fix unicode buffer corruption when deleting characters commit 1581dafaf0d34bc9c428a794a22110d7046d186d upstream. This is the same issue that was fixed for the VGA text buffer in commit 39cdb68c64d8 ("vt: fix memory overlapping when deleting chars in the buffer"). The cure is also the same i.e. replace memcpy() with memmove() due to the overlaping buffers. Signed-off-by: Nicolas Pitre Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition") Cc: stable Link: https://lore.kernel.org/r/sn184on2-3p0q-0qrq-0218-895349s4753o@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 9e30ef2b6eb8c..48a9ed7c93c97 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -398,7 +398,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) char32_t *ln = uniscr->lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; - memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); + memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } -- GitLab From 18d5fc3c16cc317bd0e5f5dabe0660df415cadb7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 4 Mar 2024 15:57:15 -0800 Subject: [PATCH 1283/1418] fs/aio: Check IOCB_AIO_RW before the struct aio_kiocb conversion commit 961ebd120565cb60cebe21cb634fbc456022db4a upstream. The first kiocb_set_cancel_fn() argument may point at a struct kiocb that is not embedded inside struct aio_kiocb. With the current code, depending on the compiler, the req->ki_ctx read happens either before the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such that it is guaranteed that the IOCB_AIO_RW test happens first. Reported-by: Eric Biggers Cc: Benjamin LaHaise Cc: Eric Biggers Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240304235715.3790858-1-bvanassche@acm.org Reviewed-by: Jens Axboe Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 849c3e3ed558b..3e3bf6fdc5ab6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,8 +591,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; /* @@ -602,9 +602,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) if (!(iocb->ki_flags & IOCB_AIO_RW)) return; + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; -- GitLab From 635e23035dd497fc408eea6fa5b0c22cc035d503 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 1 Mar 2024 15:04:02 +0800 Subject: [PATCH 1284/1418] ALSA: hda/realtek - Add Headset Mic supported Acer NB platform commit 34ab5bbc6e82214d7f7393eba26d164b303ebb4e upstream. It will be enable headset Mic for Acer NB platform. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/fe0eb6661ca240f3b7762b5b3257710d@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0b58536fc15bb..5e8acb96ab2b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10750,6 +10750,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { * at most one tbl is allowed to define for the same vendor and same codec */ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1025, "Acer", ALC2XX_FIXUP_HEADSET_MIC, + {0x19, 0x40000000}), SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1b, 0x40000000}), -- GitLab From 2cfa00b0e87f46c8a9b3087beabc18b3c0606984 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 4 Mar 2024 21:40:32 +0800 Subject: [PATCH 1285/1418] ALSA: hda/realtek: fix mute/micmute LEDs for HP EliteBook commit a17bd44c0146b00fcaa692915789c16bd1fb2a81 upstream. The HP EliteBook using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240304134033.773348-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5e8acb96ab2b4..fb12034d464ee 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9792,6 +9792,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c90, "HP EliteBook 640", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), -- GitLab From e5b5948c769aa1ebf962dddfb972f87d8f166f95 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 1 Mar 2024 20:07:31 +0530 Subject: [PATCH 1286/1418] tee: optee: Fix kernel panic caused by incorrect error handling commit 95915ba4b987cf2b222b0f251280228a1ff977ac upstream. The error path while failing to register devices on the TEE bus has a bug leading to kernel panic as follows: [ 15.398930] Unable to handle kernel paging request at virtual address ffff07ed00626d7c [ 15.406913] Mem abort info: [ 15.409722] ESR = 0x0000000096000005 [ 15.413490] EC = 0x25: DABT (current EL), IL = 32 bits [ 15.418814] SET = 0, FnV = 0 [ 15.421878] EA = 0, S1PTW = 0 [ 15.425031] FSC = 0x05: level 1 translation fault [ 15.429922] Data abort info: [ 15.432813] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 15.438310] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 15.443372] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 15.448697] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000d9e3e000 [ 15.455413] [ffff07ed00626d7c] pgd=1800000bffdf9003, p4d=1800000bffdf9003, pud=0000000000000000 [ 15.464146] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Commit 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") lead to the introduction of this bug. So fix it appropriately. Reported-by: Mikko Rapeli Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218542 Fixes: 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Signed-off-by: Jens Wiklander Signed-off-by: Greg Kroah-Hartman --- drivers/tee/optee/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b10921276942..1892e49a8e6a6 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) -- GitLab From 155a3d8d8f77d0aa969015336c4747554305704e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 21 Feb 2024 12:43:58 +0100 Subject: [PATCH 1287/1418] mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations commit 803de9000f334b771afacb6ff3e78622916668b0 upstream. Sven reports an infinite loop in __alloc_pages_slowpath() for costly order __GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination can happen in a suspend/resume context where a GFP_KERNEL allocation can have __GFP_IO masked out via gfp_allowed_mask. Quoting Sven: 1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER) with __GFP_RETRY_MAYFAIL set. 2. page alloc's __alloc_pages_slowpath tries to get a page from the freelist. This fails because there is nothing free of that costly order. 3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim, which bails out because a zone is ready to be compacted; it pretends to have made a single page of progress. 4. page alloc tries to compact, but this always bails out early because __GFP_IO is not set (it's not passed by the snd allocator, and even if it were, we are suspending so the __GFP_IO flag would be cleared anyway). 5. page alloc believes reclaim progress was made (because of the pretense in item 3) and so it checks whether it should retry compaction. The compaction retry logic thinks it should try again, because: a) reclaim is needed because of the early bail-out in item 4 b) a zonelist is suitable for compaction 6. goto 2. indefinite stall. (end quote) The immediate root cause is confusing the COMPACT_SKIPPED returned from __alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be indicating a lack of order-0 pages, and in step 5 evaluating that in should_compact_retry() as a reason to retry, before incrementing and limiting the number of retries. There are however other places that wrongly assume that compaction can happen while we lack __GFP_IO. To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO evaluation and switch the open-coded test in try_to_compact_pages() to use it. Also use the new helper in: - compaction_ready(), which will make reclaim not bail out in step 3, so there's at least one attempt to actually reclaim, even if chances are small for a costly order - in_reclaim_compaction() which will make should_continue_reclaim() return false and we don't over-reclaim unnecessarily - in __alloc_pages_slowpath() to set a local variable can_compact, which is then used to avoid retrying reclaim/compaction for costly allocations (step 5) if we can't compact and also to skip the early compaction attempt that we do in some cases Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"") Signed-off-by: Vlastimil Babka Reported-by: Sven van Ashbrook Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBzPUVOZF%2Bg@mail.gmail.com/ Tested-by: Karthikeyan Ramasubramanian Cc: Brian Geffon Cc: Curtis Malainey Cc: Jaroslav Kysela Cc: Mel Gorman Cc: Michal Hocko Cc: Takashi Iwai Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/gfp.h | 9 +++++++++ mm/compaction.c | 7 +------ mm/page_alloc.c | 10 ++++++---- mm/vmscan.c | 5 ++++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 65a78773dccad..e2ccb47c42643 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -341,6 +341,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_PM_SLEEP diff --git a/mm/compaction.c b/mm/compaction.c index 8238e83385a79..23af5f3b2ccaf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2570,16 +2570,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **capture) { - int may_perform_io = (__force int)(gfp_mask & __GFP_IO); struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - /* - * Check if the GFP flags allow compaction - GFP_NOIO is really - * tricky context because the migration might require IO - */ - if (!may_perform_io) + if (!gfp_compaction_allowed(gfp_mask)) return COMPACT_SKIPPED; trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c783806eefc9f..a7537da43bd45 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5012,6 +5012,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + bool can_compact = gfp_compaction_allowed(gfp_mask); const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; @@ -5090,7 +5091,7 @@ restart: * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && + if (can_direct_reclaim && can_compact && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -5188,9 +5189,10 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_RETRY_MAYFAIL + * __GFP_RETRY_MAYFAIL and we can compact */ - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) + if (costly_order && (!can_compact || + !(gfp_mask & __GFP_RETRY_MAYFAIL))) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -5203,7 +5205,7 @@ retry: * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ - if (did_some_progress > 0 && + if (did_some_progress > 0 && can_compact && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) diff --git a/mm/vmscan.c b/mm/vmscan.c index 9f3cfb7caa48d..a3b1d8e5dbb3d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6024,7 +6024,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -6266,6 +6266,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) unsigned long watermark; enum compact_result suitable; + if (!gfp_compaction_allowed(sc->gfp_mask)) + return false; + suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); if (suitable == COMPACT_SUCCESS) /* Allocation should succeed already. Don't reclaim. */ -- GitLab From 5df4c386d35efafdcd2d5d344c749daca649c59b Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:50 +0200 Subject: [PATCH 1288/1418] iio: accel: adxl367: fix DEVID read after reset commit 1b926914bbe4e30cb32f268893ef7d82a85275b8 upstream. regmap_read_poll_timeout() will not sleep before reading, causing the first read to return -ENXIO on I2C, since the chip does not respond to it while it is being reset. The datasheet specifies that a soft reset operation has a latency of 7.5ms. Add a 15ms sleep between reset and reading the DEVID register, and switch to a simple regmap_read() call. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-1-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adxl367.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 7c7d780407937..f1a41b92543af 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1444,9 +1444,11 @@ static int adxl367_verify_devid(struct adxl367_state *st) unsigned int val; int ret; - ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val, - val == ADXL367_DEVID_AD, 1000, 10000); + ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val); if (ret) + return dev_err_probe(st->dev, ret, "Failed to read dev id\n"); + + if (val != ADXL367_DEVID_AD) return dev_err_probe(st->dev, -ENODEV, "Invalid dev id 0x%02X, expected 0x%02X\n", val, ADXL367_DEVID_AD); @@ -1543,6 +1545,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, if (ret) return ret; + fsleep(15000); + ret = adxl367_verify_devid(st); if (ret) return ret; -- GitLab From d50fd2f24f3e6e7f39a45d6060a249c9c0f06583 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:51 +0200 Subject: [PATCH 1289/1418] iio: accel: adxl367: fix I2C FIFO data register commit 11dadb631007324c7a8bcb2650eda88ed2b9eed0 upstream. As specified in the datasheet, the I2C FIFO data register is 0x18, not 0x42. 0x42 was used by mistake when adapting the ADXL372 driver. Fix this mistake. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-2-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adxl367_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c index 3606efa25835e..5c040915276d1 100644 --- a/drivers/iio/accel/adxl367_i2c.c +++ b/drivers/iio/accel/adxl367_i2c.c @@ -11,7 +11,7 @@ #include "adxl367.h" -#define ADXL367_I2C_FIFO_DATA 0x42 +#define ADXL367_I2C_FIFO_DATA 0x18 struct adxl367_i2c_state { struct regmap *regmap; -- GitLab From 43fe5dc9faa09d30aaff587ed1b380019eec9619 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 4 Mar 2024 21:31:06 +0100 Subject: [PATCH 1290/1418] i2c: i801: Avoid potential double call to gpiod_remove_lookup_table commit ceb013b2d9a2946035de5e1827624edc85ae9484 upstream. If registering the platform device fails, the lookup table is removed in the error path. On module removal we would try to remove the lookup table again. Fix this by setting priv->lookup only if registering the platform device was successful. In addition free the memory allocated for the lookup table in the error path. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Reviewed-by: Linus Walleij Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9a4e9bf304c28..1c970842624ba 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1422,7 +1422,6 @@ static void i801_add_mux(struct i801_priv *priv) lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip, mux_config->gpios[i], "mux", 0); gpiod_add_lookup_table(lookup); - priv->lookup = lookup; /* * Register the mux device, we use PLATFORM_DEVID_NONE here @@ -1436,7 +1435,10 @@ static void i801_add_mux(struct i801_priv *priv) sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { gpiod_remove_lookup_table(lookup); + devm_kfree(dev, lookup); dev_err(dev, "Failed to register i2c-mux-gpio device\n"); + } else { + priv->lookup = lookup; } } -- GitLab From 7a391de927a7322617a9e1c60cc7d224868b4ec9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Feb 2024 15:59:22 -0500 Subject: [PATCH 1291/1418] drm/amd/display: handle range offsets in VRR ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 937844d661354bf142dc1c621396fdab10ecbacc upstream. Need to check the offset bits for values greater than 255. v2: also update amdgpu_dm_connector values. Suggested-by: Mano Ségransan Tested-by: Mano Ségransan Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3203 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a6c6f286a5988..ff460c9802eb2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10503,14 +10503,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; - amdgpu_dm_connector->min_vfreq = range->min_vfreq; - amdgpu_dm_connector->max_vfreq = range->max_vfreq; - amdgpu_dm_connector->pixel_clock_mhz = - range->pixel_clock_mhz * 10; - connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) + connector->display_info.monitor_range.min_vfreq += 255; + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ) + connector->display_info.monitor_range.max_vfreq += 255; + } + + amdgpu_dm_connector->min_vfreq = + connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = + connector->display_info.monitor_range.max_vfreq; + amdgpu_dm_connector->pixel_clock_mhz = + range->pixel_clock_mhz * 10; + break; } -- GitLab From 2149f8a56e2ed345c7a4d022a79f6b8fc53ae926 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Mar 2024 17:03:58 +0200 Subject: [PATCH 1292/1418] x86/efistub: Call mixed mode boot services on the firmware's stack commit cefcd4fe2e3aaf792c14c9e56dab89e3d7a65d02 upstream. Normally, the EFI stub calls into the EFI boot services using the stack that was live when the stub was entered. According to the UEFI spec, this stack needs to be at least 128k in size - this might seem large but all asynchronous processing and event handling in EFI runs from the same stack and so quite a lot of space may be used in practice. In mixed mode, the situation is a bit different: the bootloader calls the 32-bit EFI stub entry point, which calls the decompressor's 32-bit entry point, where the boot stack is set up, using a fixed allocation of 16k. This stack is still in use when the EFI stub is started in 64-bit mode, and so all calls back into the EFI firmware will be using the decompressor's limited boot stack. Due to the placement of the boot stack right after the boot heap, any stack overruns have gone unnoticed. However, commit 5c4feadb0011983b ("x86/decompressor: Move global symbol references to C code") moved the definition of the boot heap into C code, and now the boot stack is placed right at the base of BSS, where any overruns will corrupt the end of the .data section. While it would be possible to work around this by increasing the size of the boot stack, doing so would affect all x86 systems, and mixed mode systems are a tiny (and shrinking) fraction of the x86 installed base. So instead, record the firmware stack pointer value when entering from the 32-bit firmware, and switch to this stack every time a EFI boot service call is made. Cc: # v6.1+ Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8232c5b2a9bf5..07873f269b7bd 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -49,6 +49,11 @@ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi + + /* Switch to the firmware's stack */ + movl efi32_boot_sp(%rip), %esp + andl $~7, %esp + #ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx @@ -260,6 +265,9 @@ SYM_FUNC_START_LOCAL(efi32_entry) /* Store firmware IDT descriptor */ sidtl (efi32_boot_idt - 1b)(%ebx) + /* Store firmware stack pointer */ + movl %esp, (efi32_boot_sp - 1b)(%ebx) + /* Store boot arguments */ leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) @@ -324,5 +332,6 @@ SYM_DATA_END(efi32_boot_idt) SYM_DATA_LOCAL(efi32_boot_cs, .word 0) SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_sp, .long 0) SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) -- GitLab From cd1bbca03f3c1d845ce274c0d0a66de8e5929f72 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:21 -0800 Subject: [PATCH 1293/1418] net: tls: handle backlogging of crypto requests commit 8590541473188741055d27b955db0777569438e3 upstream. Since we're setting the CRYPTO_TFM_REQ_MAY_BACKLOG flag on our requests to the crypto API, crypto_aead_{encrypt,decrypt} can return -EBUSY instead of -EINPROGRESS in valid situations. For example, when the cryptd queue for AESNI is full (easy to trigger with an artificially low cryptd.cryptd_max_cpu_qlen), requests will be enqueued to the backlog but still processed. In that case, the async callback will also be called twice: first with err == -EINPROGRESS, which it seems we can just ignore, then with err == 0. Compared to Sabrina's original patch this version uses the new tls_*crypt_async_wait() helpers and converts the EBUSY to EINPROGRESS to avoid having to modify all the error handling paths. The handling is identical. Fixes: a54667f6728c ("tls: Add support for encryption using async offload accelerator") Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/netdev/9681d1febfec295449a62300938ed2ae66983f28.1694018970.git.sd@queasysnail.net/ Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller [Srish: v2: fixed hunk failures fixed merge-conflict in stable branch linux-6.1.y, needs to go on top of https://lore.kernel.org/stable/20240307155930.913525-1-lee@kernel.org/] Signed-off-by: Srish Srinivasan Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d53587ff9ddea..e723584fc644b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -195,6 +195,17 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -268,6 +279,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -451,6 +466,9 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct tls_rec *rec; struct sock *sk; + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; + rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -556,6 +574,10 @@ static int tls_do_encryption(struct sock *sk, atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; -- GitLab From 6ee02e35593193d25f49aed7a97f76ae1ff07a3d Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 13 Mar 2024 09:58:52 +0800 Subject: [PATCH 1294/1418] ASoC: amd: yc: Revert "Fix non-functional mic on Lenovo 21J2" commit 861b3415e4dee06cc00cd1754808a7827b9105bf upstream. This reverts commit ed00a6945dc32462c2d3744a3518d2316da66fcc, which added a quirk entry to enable the Yellow Carp (YC) driver for the Lenovo 21J2 laptop. Although the microphone functioned with the YC driver, it resulted in incorrect driver usage. The Lenovo 21J2 is not a Yellow Carp platform, but a Pink Sardine platform, which already has an upstreamed driver. The microphone on the Lenovo 21J2 operates correctly with the CONFIG_SND_SOC_AMD_PS flag enabled and does not require the quirk entry. So this patch removes the quirk entry. Thanks to Mukunda Vijendar [1] for pointing this out. Link: https://lore.kernel.org/linux-sound/023092e1-689c-4b00-b93f-4092c3724fb6@amd.com/ [1] Signed-off-by: Jiawei Wang Link: https://lore.kernel.org/linux-sound/023092e1-689c-4b00-b93f-4092c3724fb6@amd.com/ [1] Link: https://msgid.link/r/20240313015853.3573242-2-me@jwang.link Signed-off-by: Mark Brown Cc: Luca Stefani Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index e0f406b6646ba..0568e64d10150 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,13 +199,6 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), - } - }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 64c3873cb255d23047843ca3a1b9769c07d7ecef Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Mon, 4 Mar 2024 16:40:50 +0530 Subject: [PATCH 1295/1418] iommu: Avoid races around default domain allocations This fix is applicable for LTS kernel, 6.1.y. In latest kernels, this race issue is fixed by the patch series [1] and [2]. The right thing to do here would have been propagating these changes from latest kernel to the stable branch, 6.1.y. However, these changes seems too intrusive to be picked for stable branches. Hence, the fix proposed can be taken as an alternative instead of backporting the patch series. [1] https://lore.kernel.org/all/0-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com/ [2] https://lore.kernel.org/all/0-v5-1b99ae392328+44574-iommu_err_unwind_jgg@nvidia.com/ Issue: A race condition is observed when arm_smmu_device_probe and modprobe of client devices happens in parallel. This results in the allocation of a new default domain for the iommu group even though it was previously allocated and the respective iova domain(iovad) was initialized. However, for this newly allocated default domain, iovad will not be initialized. As a result, for devices requesting dma allocations, this uninitialized iovad will be used, thereby causing NULL pointer dereference issue. Flow: - During arm_smmu_device_probe, bus_iommu_probe() will be called as part of iommu_device_register(). This results in the device probe, __iommu_probe_device(). - When the modprobe of the client device happens in parallel, it sets up the DMA configuration for the device using of_dma_configure_id(), which inturn calls iommu_probe_device(). Later, default domain is allocated and attached using iommu_alloc_default_domain() and __iommu_attach_device() respectively. It then ends up initializing a mapping domain(IOVA domain) and rcaches for the device via arch_setup_dma_ops()->iommu_setup_dma_ops(). - Now, in the bus_iommu_probe() path, it again tries to allocate a default domain via probe_alloc_default_domain(). This results in allocating a new default domain(along with IOVA domain) via __iommu_domain_alloc(). However, this newly allocated IOVA domain will not be initialized. - Now, when the same client device tries dma allocations via iommu_dma_alloc(), it ends up accessing the rcaches of the newly allocated IOVA domain, which is not initialized. This results into NULL pointer dereferencing. Fix this issue by adding a check in probe_alloc_default_domain() to see if the iommu_group already has a default domain allocated and initialized. Cc: # see patch description, fix applicable only for 6.1.y Signed-off-by: Charan Teja Kalla Co-developed-by: Nikhil V Signed-off-by: Nikhil V Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8b38972394776..83736824f17d1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1741,6 +1741,9 @@ static void probe_alloc_default_domain(struct bus_type *bus, { struct __group_domain_type gtype; + if (group->default_domain) + return; + memset(>ype, 0, sizeof(gtype)); /* Ask for default domain requirements of all devices in the group */ -- GitLab From 936381380ae83563d328ab729251d8bc40581fac Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 18 Feb 2024 18:41:37 +0100 Subject: [PATCH 1296/1418] clocksource/drivers/arm_global_timer: Fix maximum prescaler value [ Upstream commit b34b9547cee41575a4fddf390f615570759dc999 ] The prescaler in the "Global Timer Control Register bit assignments" is documented to use bits [15:8], which means that the maximum prescaler register value is 0xff. Fixes: 171b45a4a70e ("clocksource/drivers/arm_global_timer: Implement rate compensation whenever source clock changes") Signed-off-by: Martin Blumenstingl Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240218174138.1942418-2-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/clocksource/arm_global_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 44a61dc6f9320..e1c773bb55359 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -32,7 +32,7 @@ #define GT_CONTROL_IRQ_ENABLE BIT(2) /* banked */ #define GT_CONTROL_AUTO_INC BIT(3) /* banked */ #define GT_CONTROL_PRESCALER_SHIFT 8 -#define GT_CONTROL_PRESCALER_MAX 0xF +#define GT_CONTROL_PRESCALER_MAX 0xFF #define GT_CONTROL_PRESCALER_MASK (GT_CONTROL_PRESCALER_MAX << \ GT_CONTROL_PRESCALER_SHIFT) -- GitLab From 3e5222a17914f663e56c5ffca15586cf0960b4b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20R=C3=B6sti?= Date: Mon, 11 Mar 2024 21:17:04 +0000 Subject: [PATCH 1297/1418] entry: Respect changes to system call number by trace_sys_enter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fb13b11d53875e28e7fbf0c26b288e4ea676aa9f ] When a probe is registered at the trace_sys_enter() tracepoint, and that probe changes the system call number, the old system call still gets executed. This worked correctly until commit b6ec41346103 ("core/entry: Report syscall correctly for trace and audit"), which removed the re-evaluation of the syscall number after the trace point. Restore the original semantics by re-evaluating the system call number after trace_sys_enter(). The performance impact of this re-evaluation is minimal because it only takes place when a trace point is active, and compared to the actual trace point overhead the read from a cache hot variable is negligible. Fixes: b6ec41346103 ("core/entry: Report syscall correctly for trace and audit") Signed-off-by: André Rösti Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240311211704.7262-1-an.roesti@gmail.com Signed-off-by: Sasha Levin --- kernel/entry/common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b54..ccf2b1e1b40be 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -77,8 +77,14 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Either of the above might have changed the syscall number */ syscall = syscall_get_nr(current, regs); - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } syscall_enter_audit(regs, syscall); -- GitLab From 8d8be62a7d5386fe224c5455361da980ce64a96f Mon Sep 17 00:00:00 2001 From: David Laight Date: Mon, 18 Sep 2023 08:16:30 +0000 Subject: [PATCH 1298/1418] minmax: add umin(a, b) and umax(a, b) [ Upstream commit 80fcac55385ccb710d33a20dc1caaef29bd5a921 ] Patch series "minmax: Relax type checks in min() and max()", v4. The min() (etc) functions in minmax.h require that the arguments have exactly the same types. However when the type check fails, rather than look at the types and fix the type of a variable/constant, everyone seems to jump on min_t(). In reality min_t() ought to be rare - when something unusual is being done, not normality. The orginal min() (added in 2.4.9) replaced several inline functions and included the type - so matched the implicit casting of the function call. This was renamed min_t() in 2.4.10 and the current min() added. There is no actual indication that the conversion of negatve values to large unsigned values has ever been an actual problem. A quick grep shows 5734 min() and 4597 min_t(). Having the casts on almost half of the calls shows that something is clearly wrong. If the wrong type is picked (and it is far too easy to pick the type of the result instead of the larger input) then significant bits can get discarded. Pretty much the worst example is in the derived clamp_val(), consider: unsigned char x = 200u; y = clamp_val(x, 10u, 300u); I also suspect that many of the min_t(u16, ...) are actually wrong. For example copy_data() in printk_ringbuffer.c contains: data_size = min_t(u16, buf_size, len); Here buf_size is 'unsigned int' and len 'u16', pass a 64k buffer (can you prove that doesn't happen?) and no data is returned. Apparantly it did - and has since been fixed. The only reason that most of the min_t() are 'fine' is that pretty much all the values in the kernel are between 0 and INT_MAX. Patch 1 adds umin(), this uses integer promotions to convert both arguments to 'unsigned long long'. It can be used to compare a signed type that is known to contain a non-negative value with an unsigned type. The compiler typically optimises it all away. Added first so that it can be referred to in patch 2. Patch 2 replaces the 'same type' check with a 'same signedness' one. This makes min(unsigned_int_var, sizeof()) be ok. The error message is also improved and will contain the expanded form of both arguments (useful for seeing how constants are defined). Patch 3 just fixes some whitespace. Patch 4 allows comparisons of 'unsigned char' and 'unsigned short' to signed types. The integer promotion rules convert them both to 'signed int' prior to the comparison so they can never cause a negative value be converted to a large positive one. Patch 5 (rewritted for v4) allows comparisons of unsigned values against non-negative constant integer expressions. This makes min(unsigned_int_var, 4) be ok. The only common case that is still errored is the comparison of signed values against unsigned constant integer expressions below __INT_MAX__. Typcally min(int_val, sizeof (foo)), the real fix for this is casting the constant: min(int_var, (int)sizeof (foo)). With all the patches applied pretty much all the min_t() could be replaced by min(), and most of the rest by umin(). However they all need careful inspection due to code like: sz = min_t(unsigned char, sz - 1, LIM - 1) + 1; which converts 0 to LIM. This patch (of 6): umin() and umax() can be used when min()/max() errors a signed v unsigned compare when the signed value is known to be non-negative. Unlike min_t(some_unsigned_type, a, b) umin() will never mask off high bits if an inappropriate type is selected. The '+ 0u + 0ul + 0ull' may look strange. The '+ 0u' is needed for 'signed int' on 64bit systems. The '+ 0ul' is needed for 'signed long' on 32bit systems. The '+ 0ull' is needed for 'signed long long'. Link: https://lkml.kernel.org/r/b97faef60ad24922b530241c5d7c933c@AcuMS.aculab.com Link: https://lkml.kernel.org/r/41d93ca827a248698ec64bf57e0c05a5@AcuMS.aculab.com Signed-off-by: David Laight Cc: Andy Shevchenko Cc: Christoph Hellwig Cc: Jason A. Donenfeld Cc: Linus Torvalds Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 51b30ecb73b4 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present") Signed-off-by: Sasha Levin --- include/linux/minmax.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc685..1aea34b8f19bf 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -51,6 +51,23 @@ */ #define max(x, y) __careful_cmp(x, y, >) +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, <) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, >) + /** * min3 - return minimum of three values * @x: first value -- GitLab From ef80ecc721274c0602719abe822d98ec7e6073fe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Mar 2024 15:28:27 +0000 Subject: [PATCH 1299/1418] swiotlb: Fix alignment checks when both allocation and DMA masks are present [ Upstream commit 51b30ecb73b481d5fac6ccf2ecb4a309c9ee3310 ] Nicolin reports that swiotlb buffer allocations fail for an NVME device behind an IOMMU using 64KiB pages. This is because we end up with a minimum allocation alignment of 64KiB (for the IOMMU to map the buffer safely) but a minimum DMA alignment mask corresponding to a 4KiB NVME page (i.e. preserving the 4KiB page offset from the original allocation). If the original address is not 4KiB-aligned, the allocation will fail because swiotlb_search_pool_area() erroneously compares these unmasked bits with the 64KiB-aligned candidate allocation. Tweak swiotlb_search_pool_area() so that the DMA alignment mask is reduced based on the required alignment of the allocation. Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers") Link: https://lore.kernel.org/r/cover.1707851466.git.nicolinc@nvidia.com Reported-by: Nicolin Chen Signed-off-by: Will Deacon Reviewed-by: Michael Kelley Tested-by: Nicolin Chen Tested-by: Michael Kelley Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/swiotlb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ad6333c3fe1ff..db89ac94e7db4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -654,8 +654,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -666,6 +665,14 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, BUG_ON(!nslots); BUG_ON(area_index >= mem->nareas); + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. + */ + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. For allocations of -- GitLab From d7f62fde4191b912f68ff958fb333237869c2cdb Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 8 Mar 2024 15:28:28 +0000 Subject: [PATCH 1300/1418] iommu/dma: Force swiotlb_max_mapping_size on an untrusted device [ Upstream commit afc5aa46ed560f01ceda897c053c6a40c77ce5c4 ] The swiotlb does not support a mapping size > swiotlb_max_mapping_size(). On the other hand, with a 64KB PAGE_SIZE configuration, it's observed that an NVME device can map a size between 300KB~512KB, which certainly failed the swiotlb mappings, though the default pool of swiotlb has many slots: systemd[1]: Started Journal Service. => nvme 0000:00:01.0: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 32 (slots) note: journal-offline[392] exited with irqs disabled note: journal-offline[392] exited with preempt_count 1 Call trace: [ 3.099918] swiotlb_tbl_map_single+0x214/0x240 [ 3.099921] iommu_dma_map_page+0x218/0x328 [ 3.099928] dma_map_page_attrs+0x2e8/0x3a0 [ 3.101985] nvme_prep_rq.part.0+0x408/0x878 [nvme] [ 3.102308] nvme_queue_rqs+0xc0/0x300 [nvme] [ 3.102313] blk_mq_flush_plug_list.part.0+0x57c/0x600 [ 3.102321] blk_add_rq_to_plug+0x180/0x2a0 [ 3.102323] blk_mq_submit_bio+0x4c8/0x6b8 [ 3.103463] __submit_bio+0x44/0x220 [ 3.103468] submit_bio_noacct_nocheck+0x2b8/0x360 [ 3.103470] submit_bio_noacct+0x180/0x6c8 [ 3.103471] submit_bio+0x34/0x130 [ 3.103473] ext4_bio_write_folio+0x5a4/0x8c8 [ 3.104766] mpage_submit_folio+0xa0/0x100 [ 3.104769] mpage_map_and_submit_buffers+0x1a4/0x400 [ 3.104771] ext4_do_writepages+0x6a0/0xd78 [ 3.105615] ext4_writepages+0x80/0x118 [ 3.105616] do_writepages+0x90/0x1e8 [ 3.105619] filemap_fdatawrite_wbc+0x94/0xe0 [ 3.105622] __filemap_fdatawrite_range+0x68/0xb8 [ 3.106656] file_write_and_wait_range+0x84/0x120 [ 3.106658] ext4_sync_file+0x7c/0x4c0 [ 3.106660] vfs_fsync_range+0x3c/0xa8 [ 3.106663] do_fsync+0x44/0xc0 Since untrusted devices might go down the swiotlb pathway with dma-iommu, these devices should not map a size larger than swiotlb_max_mapping_size. To fix this bug, add iommu_dma_max_mapping_size() for untrusted devices to take into account swiotlb_max_mapping_size() v.s. iova_rcache_range() from the iommu_dma_opt_mapping_size(). Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers") Link: https://lore.kernel.org/r/ee51a3a5c32cf885b18f6416171802669f4a718a.1707851466.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen [will: Drop redundant is_swiotlb_active(dev) check] Signed-off-by: Will Deacon Reviewed-by: Michael Kelley Acked-by: Robin Murphy Tested-by: Nicolin Chen Tested-by: Michael Kelley Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/iommu/dma-iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index aa6d62cc567ae..3fa66dba0a326 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1547,6 +1547,14 @@ static size_t iommu_dma_opt_mapping_size(void) return iova_rcache_range(); } +static size_t iommu_dma_max_mapping_size(struct device *dev) +{ + if (dev_is_untrusted(dev)) + return swiotlb_max_mapping_size(dev); + + return SIZE_MAX; +} + static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, @@ -1569,6 +1577,7 @@ static const struct dma_map_ops iommu_dma_ops = { .unmap_resource = iommu_dma_unmap_resource, .get_merge_boundary = iommu_dma_get_merge_boundary, .opt_mapping_size = iommu_dma_opt_mapping_size, + .max_mapping_size = iommu_dma_max_mapping_size, }; /* -- GitLab From 2f67c82ed04ee43241b3cc94516fcad9a4c21cf4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 26 Feb 2024 13:07:24 +0106 Subject: [PATCH 1301/1418] printk: Update @console_may_schedule in console_trylock_spinning() [ Upstream commit 8076972468584d4a21dab9aa50e388b3ea9ad8c7 ] console_trylock_spinning() may takeover the console lock from a schedulable context. Update @console_may_schedule to make sure it reflects a trylock acquire. Reported-by: Mukesh Ojha Closes: https://lore.kernel.org/lkml/20240222090538.23017-1-quic_mojha@quicinc.com Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Signed-off-by: John Ogness Reviewed-by: Mukesh Ojha Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/875xybmo2z.fsf@jogness.linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c55ee859dbd08..0ae06d5046bb0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1947,6 +1947,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } -- GitLab From 0250a68a35c95843174c1403882b3dc12c98a28c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:16 +0200 Subject: [PATCH 1302/1418] irqchip/renesas-rzg2l: Implement restriction when writing ISCR register [ Upstream commit ef88eefb1a81a8701eabb7d5ced761a66a465a49 ] The RZ/G2L manual (chapter "IRQ Status Control Register (ISCR)") describes the operation to clear interrupts through the ISCR register as follows: [Write operation] When "Falling-edge detection", "Rising-edge detection" or "Falling/Rising-edge detection" is set in IITSR: - In case ISTAT is 1 0: IRQn interrupt detection status is cleared. 1: Invalid to write. - In case ISTAT is 0 Invalid to write. When "Low-level detection" is set in IITSR.: Invalid to write. Take the interrupt type into account when clearing interrupts through the ISCR register to avoid writing the ISCR when the interrupt type is level. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231120111820.87398-6-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: 9eec61df55c5 ("irqchip/renesas-rzg2l: Flush posted write in irq_eoi()") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 10c3e85c90c23..fbd1766f6aaa5 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -72,11 +72,17 @@ static void rzg2l_irq_eoi(struct irq_data *d) unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); u32 bit = BIT(hw_irq); - u32 reg; + u32 iitsr, iscr; - reg = readl_relaxed(priv->base + ISCR); - if (reg & bit) - writel_relaxed(reg & ~bit, priv->base + ISCR); + iscr = readl_relaxed(priv->base + ISCR); + iitsr = readl_relaxed(priv->base + IITSR); + + /* + * ISCR can only be cleared if the type is falling-edge, rising-edge or + * falling/rising-edge. + */ + if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) + writel_relaxed(iscr & ~bit, priv->base + ISCR); } static void rzg2l_tint_eoi(struct irq_data *d) -- GitLab From 7775e87f503a9867b22c3b04392075b0262e1544 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:18 +0000 Subject: [PATCH 1303/1418] irqchip/renesas-rzg2l: Flush posted write in irq_eoi() [ Upstream commit 9eec61df55c51415409c7cc47e9a1c8de94a0522 ] The irq_eoi() callback of the RZ/G2L interrupt chip clears the relevant interrupt cause bit in the TSCR register by writing to it. This write is not sufficient because the write is posted and therefore not guaranteed to immediately clear the bit. Due to that delay the CPU can raise the just handled interrupt again. Prevent this by reading the register back which causes the posted write to be flushed to the hardware before the read completes. Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index fbd1766f6aaa5..454af6faf42bc 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -81,8 +81,14 @@ static void rzg2l_irq_eoi(struct irq_data *d) * ISCR can only be cleared if the type is falling-edge, rising-edge or * falling/rising-edge. */ - if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) + if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) { writel_relaxed(iscr & ~bit, priv->base + ISCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + ISCR); + } } static void rzg2l_tint_eoi(struct irq_data *d) @@ -93,8 +99,14 @@ static void rzg2l_tint_eoi(struct irq_data *d) u32 reg; reg = readl_relaxed(priv->base + TSCR); - if (reg & bit) + if (reg & bit) { writel_relaxed(reg & ~bit, priv->base + TSCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + TSCR); + } } static void rzg2l_irqc_eoi(struct irq_data *d) -- GitLab From 5dc2cb3eeeeeefca9e8c28115cb732b6b4d446a7 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:17 +0200 Subject: [PATCH 1304/1418] irqchip/renesas-rzg2l: Add macro to retrieve TITSR register offset based on register's index [ Upstream commit 2eca4731cc66563b3919d8753dbd74d18c39f662 ] There are 2 TITSR registers available on the IA55 interrupt controller. Add a macro that retrieves the TITSR register offset based on it's index. This macro is useful in when adding suspend/resume support so both TITSR registers can be accessed in a for loop. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231120111820.87398-7-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 454af6faf42bc..a74391615ab38 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -28,8 +28,7 @@ #define ISCR 0x10 #define IITSR 0x14 #define TSCR 0x20 -#define TITSR0 0x24 -#define TITSR1 0x28 +#define TITSR(n) (0x24 + (n) * 4) #define TITSR0_MAX_INT 16 #define TITSEL_WIDTH 0x2 #define TSSR(n) (0x30 + ((n) * 4)) @@ -206,8 +205,7 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); unsigned int hwirq = irqd_to_hwirq(d); u32 titseln = hwirq - IRQC_TINT_START; - u32 offset; - u8 sense; + u8 index, sense; u32 reg; switch (type & IRQ_TYPE_SENSE_MASK) { @@ -223,17 +221,17 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) return -EINVAL; } - offset = TITSR0; + index = 0; if (titseln >= TITSR0_MAX_INT) { titseln -= TITSR0_MAX_INT; - offset = TITSR1; + index = 1; } raw_spin_lock(&priv->lock); - reg = readl_relaxed(priv->base + offset); + reg = readl_relaxed(priv->base + TITSR(index)); reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH)); reg |= sense << (titseln * TITSEL_WIDTH); - writel_relaxed(reg, priv->base + offset); + writel_relaxed(reg, priv->base + TITSR(index)); raw_spin_unlock(&priv->lock); return 0; -- GitLab From 551ad70e86621587cd7cec4351d55a37f0eb50d4 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:19 +0000 Subject: [PATCH 1305/1418] irqchip/renesas-rzg2l: Rename rzg2l_tint_eoi() [ Upstream commit 7cb6362c63df233172eaecddaf9ce2ce2f769112 ] Rename rzg2l_tint_eoi()->rzg2l_clear_tint_int() and simplify the code by removing redundant priv and hw_irq local variables. Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index a74391615ab38..ced5064eb0e9d 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -90,11 +90,9 @@ static void rzg2l_irq_eoi(struct irq_data *d) } } -static void rzg2l_tint_eoi(struct irq_data *d) +static void rzg2l_clear_tint_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_TINT_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); - u32 bit = BIT(hw_irq); + u32 bit = BIT(hwirq - IRQC_TINT_START); u32 reg; reg = readl_relaxed(priv->base + TSCR); @@ -117,7 +115,7 @@ static void rzg2l_irqc_eoi(struct irq_data *d) if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT) rzg2l_irq_eoi(d); else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) - rzg2l_tint_eoi(d); + rzg2l_clear_tint_int(priv, hw_irq); raw_spin_unlock(&priv->lock); irq_chip_eoi_parent(d); } -- GitLab From 573777eb56071631b5310348898ad3a1f486e8e0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:20 +0000 Subject: [PATCH 1306/1418] irqchip/renesas-rzg2l: Rename rzg2l_irq_eoi() [ Upstream commit b4b5cd61a6fdd92ede0dc39f0850a182affd1323 ] Rename rzg2l_irq_eoi()->rzg2l_clear_irq_int() and simplify the code by removing redundant priv local variable. Suggested-by: Geert Uytterhoeven Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index ced5064eb0e9d..aa27335ab2eec 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -66,10 +66,9 @@ static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data) return data->domain->host_data; } -static void rzg2l_irq_eoi(struct irq_data *d) +static void rzg2l_clear_irq_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hw_irq = hwirq - IRQC_IRQ_START; u32 bit = BIT(hw_irq); u32 iitsr, iscr; @@ -113,7 +112,7 @@ static void rzg2l_irqc_eoi(struct irq_data *d) raw_spin_lock(&priv->lock); if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT) - rzg2l_irq_eoi(d); + rzg2l_clear_irq_int(priv, hw_irq); else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) rzg2l_clear_tint_int(priv, hw_irq); raw_spin_unlock(&priv->lock); -- GitLab From dc33e92af306ab8407aab843c85d30384b37054b Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:21 +0000 Subject: [PATCH 1307/1418] irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type [ Upstream commit 853a6030303f8a8fa54929b68e5665d9b21aa405 ] RZ/G2L interrupt chips require that the interrupt is masked before changing the NMI, IRQ, TINT interrupt settings. Aside of that, after setting an edge trigger type it is required to clear the interrupt status register in order to avoid spurious interrupts. The current implementation fails to do either of that and therefore is prone to generate spurious interrupts when setting the trigger type. Address this by: - Ensuring that the interrupt is masked at the chip level across the update for the TINT chip - Clearing the interrupt status register after updating the trigger mode for edge type interrupts [ tglx: Massaged changelog and reverted the spin_lock_irqsave() change as the set_type() callback is always called with interrupts disabled. ] Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 36 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index aa27335ab2eec..be71459c7465a 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -162,8 +162,10 @@ static void rzg2l_irqc_irq_enable(struct irq_data *d) static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hwirq = irqd_to_hwirq(d); + u32 iitseln = hwirq - IRQC_IRQ_START; + bool clear_irq_int = false; u16 sense, tmp; switch (type & IRQ_TYPE_SENSE_MASK) { @@ -173,14 +175,17 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_FALLING: sense = IITSR_IITSEL_EDGE_FALLING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_RISING: sense = IITSR_IITSEL_EDGE_RISING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_BOTH: sense = IITSR_IITSEL_EDGE_BOTH; + clear_irq_int = true; break; default: @@ -189,21 +194,40 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) raw_spin_lock(&priv->lock); tmp = readl_relaxed(priv->base + IITSR); - tmp &= ~IITSR_IITSEL_MASK(hw_irq); - tmp |= IITSR_IITSEL(hw_irq, sense); + tmp &= ~IITSR_IITSEL_MASK(iitseln); + tmp |= IITSR_IITSEL(iitseln, sense); + if (clear_irq_int) + rzg2l_clear_irq_int(priv, hwirq); writel_relaxed(tmp, priv->base + IITSR); raw_spin_unlock(&priv->lock); return 0; } +static u32 rzg2l_disable_tint_and_set_tint_source(struct irq_data *d, struct rzg2l_irqc_priv *priv, + u32 reg, u32 tssr_offset, u8 tssr_index) +{ + u32 tint = (u32)(uintptr_t)irq_data_get_irq_chip_data(d); + u32 tien = reg & (TIEN << TSSEL_SHIFT(tssr_offset)); + + /* Clear the relevant byte in reg */ + reg &= ~(TSSEL_MASK << TSSEL_SHIFT(tssr_offset)); + /* Set TINT and leave TIEN clear */ + reg |= tint << TSSEL_SHIFT(tssr_offset); + writel_relaxed(reg, priv->base + TSSR(tssr_index)); + + return reg | tien; +} + static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) { struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); unsigned int hwirq = irqd_to_hwirq(d); u32 titseln = hwirq - IRQC_TINT_START; + u32 tssr_offset = TSSR_OFFSET(titseln); + u8 tssr_index = TSSR_INDEX(titseln); u8 index, sense; - u32 reg; + u32 reg, tssr; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: @@ -225,10 +249,14 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) } raw_spin_lock(&priv->lock); + tssr = readl_relaxed(priv->base + TSSR(tssr_index)); + tssr = rzg2l_disable_tint_and_set_tint_source(d, priv, tssr, tssr_offset, tssr_index); reg = readl_relaxed(priv->base + TITSR(index)); reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH)); reg |= sense << (titseln * TITSEL_WIDTH); writel_relaxed(reg, priv->base + TITSR(index)); + rzg2l_clear_tint_int(priv, hwirq); + writel_relaxed(tssr, priv->base + TSSR(tssr_index)); raw_spin_unlock(&priv->lock); return 0; -- GitLab From 6417684315087904fffe8966d27ca74398c57dd6 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 15 Mar 2024 00:17:30 +0900 Subject: [PATCH 1308/1418] kprobes/x86: Use copy_from_kernel_nofault() to read from unsafe address [ Upstream commit 4e51653d5d871f40f1bd5cf95cc7f2d8b33d063b ] Read from an unsafe address with copy_from_kernel_nofault() in arch_adjust_kprobe_addr() because this function is used before checking the address is in text or not. Syzcaller bot found a bug and reported the case if user specifies inaccessible data area, arch_adjust_kprobe_addr() will cause a kernel panic. [ mingo: Clarified the comment. ] Fixes: cc66bb914578 ("x86/ibt,kprobes: Cure sym+0 equals fentry woes") Reported-by: Qiang Zhang Tested-by: Jinghao Jia Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/171042945004.154897.2221804961882915806.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/x86/kernel/kprobes/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6120f25b0d5cc..991f00c817e6c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -301,7 +301,16 @@ static int can_probe(unsigned long paddr) kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; -- GitLab From 493ed133b4e444f4cc31f34e44264fc2a042a595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?KONDO=20KAZUMA=28=E8=BF=91=E8=97=A4=E3=80=80=E5=92=8C?= =?UTF-8?q?=E7=9C=9F=29?= Date: Fri, 22 Mar 2024 10:47:02 +0000 Subject: [PATCH 1309/1418] efi/libstub: fix efi_random_alloc() to allocate memory at alloc_min or higher address [ Upstream commit 3cb4a4827596abc82e55b80364f509d0fefc3051 ] Following warning is sometimes observed while booting my servers: [ 3.594838] DMA: preallocated 4096 KiB GFP_KERNEL pool for atomic allocations [ 3.602918] swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0-1 ... [ 3.851862] DMA: preallocated 1024 KiB GFP_KERNEL|GFP_DMA pool for atomic allocation If 'nokaslr' boot option is set, the warning always happens. On x86, ZONE_DMA is small zone at the first 16MB of physical address space. When this problem happens, most of that space seems to be used by decompressed kernel. Thereby, there is not enough space at DMA_ZONE to meet the request of DMA pool allocation. The commit 2f77465b05b1 ("x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR") tried to fix this problem by introducing lower bound of allocation. But the fix is not complete. efi_random_alloc() allocates pages by following steps. 1. Count total available slots ('total_slots') 2. Select a slot ('target_slot') to allocate randomly 3. Calculate a starting address ('target') to be included target_slot 4. Allocate pages, which starting address is 'target' In step 1, 'alloc_min' is used to offset the starting address of memory chunk. But in step 3 'alloc_min' is not considered at all. As the result, 'target' can be miscalculated and become lower than 'alloc_min'. When KASLR is disabled, 'target_slot' is always 0 and the problem happens everytime if the EFI memory map of the system meets the condition. Fix this problem by calculating 'target' considering 'alloc_min'. Cc: linux-efi@vger.kernel.org Cc: Tom Englund Cc: linux-kernel@vger.kernel.org Fixes: 2f77465b05b1 ("x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR") Signed-off-by: Kazuma Kondo Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/randomalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 7ba05719a53ba..0d7b11b55ff31 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -119,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(md->phys_addr, align) + target_slot * align; + target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, -- GitLab From 21c7c00dae55cb0e3810d5f9506b58f68475d41d Mon Sep 17 00:00:00 2001 From: Adamos Ttofari Date: Fri, 22 Mar 2024 16:04:39 -0700 Subject: [PATCH 1310/1418] x86/fpu: Keep xfd_state in sync with MSR_IA32_XFD [ Upstream commit 10e4b5166df9ff7a2d5316138ca668b42d004422 ] Commit 672365477ae8 ("x86/fpu: Update XFD state where required") and commit 8bf26758ca96 ("x86/fpu: Add XFD state to fpstate") introduced a per CPU variable xfd_state to keep the MSR_IA32_XFD value cached, in order to avoid unnecessary writes to the MSR. On CPU hotplug MSR_IA32_XFD is reset to the init_fpstate.xfd, which wipes out any stale state. But the per CPU cached xfd value is not reset, which brings them out of sync. As a consequence a subsequent xfd_update_state() might fail to update the MSR which in turn can result in XRSTOR raising a #NM in kernel space, which crashes the kernel. To fix this, introduce xfd_set_state() to write xfd_state together with MSR_IA32_XFD, and use it in all places that set MSR_IA32_XFD. Fixes: 672365477ae8 ("x86/fpu: Update XFD state where required") Signed-off-by: Adamos Ttofari Signed-off-by: Chang S. Bae Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240322230439.456571-1-chang.seok.bae@intel.com Closes: https://lore.kernel.org/lkml/20230511152818.13839-1-attofari@amazon.de Signed-off-by: Sasha Levin --- arch/x86/kernel/fpu/xstate.c | 5 +++-- arch/x86/kernel/fpu/xstate.h | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ebe698f8af73b..2aa849705bb68 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -177,10 +177,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b0..19ca623ffa2ac 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { -- GitLab From b9d103aca85f082a343b222493f3cab1219aaaf4 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Sat, 23 Mar 2024 06:33:33 +0000 Subject: [PATCH 1311/1418] efi: fix panic in kdump kernel [ Upstream commit 62b71cd73d41ddac6b1760402bbe8c4932e23531 ] Check if get_next_variable() is actually valid pointer before calling it. In kdump kernel this method is set to NULL that causes panic during the kexec-ed kernel boot. Tested with QEMU and OVMF firmware. Fixes: bad267f9e18f ("efi: verify that variable services are supported") Signed-off-by: Oleksandr Tymoshenko Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9077353d1c98d..28d4defc5d0cd 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -194,6 +194,8 @@ static bool generic_ops_supported(void) name_size = sizeof(name); + if (!efi.get_next_variable) + return false; status = efi.get_next_variable(&name_size, &name, &guid); if (status == EFI_UNSUPPORTED) return false; -- GitLab From 44b6fb6cdedb2c391a2da355521d4610b2645fcc Mon Sep 17 00:00:00 2001 From: Zoltan HERPAI Date: Wed, 20 Mar 2024 09:36:02 +0100 Subject: [PATCH 1312/1418] pwm: img: fix pwm clock lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9eb05877dbee03064d3d3483cd6702f610d5a358 ] 22e8e19 has introduced a regression in the imgchip->pwm_clk lookup, whereas the clock name has also been renamed to "imgchip". This causes the driver failing to load: [ 0.546905] img-pwm 18101300.pwm: failed to get imgchip clock [ 0.553418] img-pwm: probe of 18101300.pwm failed with error -2 Fix this lookup by reverting the clock name back to "pwm". Signed-off-by: Zoltan HERPAI Link: https://lore.kernel.org/r/20240320083602.81592-1-wigyori@uid0.hu Fixes: 22e8e19a46f7 ("pwm: img: Rename variable pointing to driver private data") Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-img.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 0fccf061ab958..8ce6c453adf07 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -289,9 +289,9 @@ static int img_pwm_probe(struct platform_device *pdev) return PTR_ERR(imgchip->sys_clk); } - imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip"); + imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm"); if (IS_ERR(imgchip->pwm_clk)) { - dev_err(&pdev->dev, "failed to get imgchip clock\n"); + dev_err(&pdev->dev, "failed to get pwm clock\n"); return PTR_ERR(imgchip->pwm_clk); } -- GitLab From 4f2debd9c6801065196960d2967de4668ed025a8 Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Wed, 21 Feb 2024 12:53:04 +0100 Subject: [PATCH 1313/1418] tty: serial: imx: Fix broken RS485 commit 672448ccf9b6a676f96f9352cbf91f4d35f4084a upstream. When about to transmit the function imx_uart_start_tx is called and in some RS485 configurations this function will call imx_uart_stop_rx. The problem is that imx_uart_stop_rx will enable loopback in order to release the RS485 bus, but when loopback is enabled transmitted data will just be looped to RX. This patch fixes the above problem by not enabling loopback when about to transmit. This driver now works well when used for RS485 half duplex master configurations. Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high") Cc: stable Signed-off-by: Rickard x Andersson Tested-by: Christoph Niedermaier Link: https://lore.kernel.org/r/20240221115304.509811-1-rickaran@axis.com Signed-off-by: Christoph Niedermaier Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index f8962a3d44216..573bf7e9b7978 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -496,8 +496,7 @@ static void imx_uart_stop_tx(struct uart_port *port) } } -/* called with port.lock taken and irqs off */ -static void imx_uart_stop_rx(struct uart_port *port) +static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback) { struct imx_port *sport = (struct imx_port *)port; u32 ucr1, ucr2, ucr4, uts; @@ -519,7 +518,7 @@ static void imx_uart_stop_rx(struct uart_port *port) /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ if (port->rs485.flags & SER_RS485_ENABLED && port->rs485.flags & SER_RS485_RTS_ON_SEND && - sport->have_rtscts && !sport->have_rtsgpio) { + sport->have_rtscts && !sport->have_rtsgpio && loopback) { uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); uts |= UTS_LOOP; imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); @@ -531,6 +530,16 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } +/* called with port.lock taken and irqs off */ +static void imx_uart_stop_rx(struct uart_port *port) +{ + /* + * Stop RX and enable loopback in order to make sure RS485 bus + * is not blocked. Se comment in imx_uart_probe(). + */ + imx_uart_stop_rx_with_loopback_ctrl(port, true); +} + /* called with port.lock taken and irqs off */ static void imx_uart_enable_ms(struct uart_port *port) { @@ -719,8 +728,13 @@ static void imx_uart_start_tx(struct uart_port *port) imx_uart_rts_inactive(sport, &ucr2); imx_uart_writel(sport, ucr2, UCR2); + /* + * Since we are about to transmit we can not stop RX + * with loopback enabled because that will make our + * transmitted data being just looped to RX. + */ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - imx_uart_stop_rx(port); + imx_uart_stop_rx_with_loopback_ctrl(port, false); sport->tx_state = WAIT_AFTER_RTS; -- GitLab From 242006996d15f5ca62e22f8c7de077d9c4a8f367 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 29 Feb 2024 13:08:09 -0500 Subject: [PATCH 1314/1418] block: Fix page refcounts for unaligned buffers in __bio_release_pages() commit 38b43539d64b2fa020b3b9a752a986769f87f7a6 upstream. Fix an incorrect number of pages being released for buffers that do not start at the beginning of a page. Fixes: 1b151e2435fc ("block: Remove special-casing of compound pages") Cc: stable@vger.kernel.org Signed-off-by: Tony Battersby Tested-by: Greg Edwards Link: https://lore.kernel.org/r/86e592a9-98d4-4cff-a646-0c0084328356@cybernetics.com Signed-off-by: Jens Axboe [ Tony: backport to v6.1 by replacing bio_release_page() loop with folio_put_refs() as commits fd363244e883 and e4cc64657bec are not present. ] Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/bio.c b/block/bio.c index 74c2818c7ec99..3318e0022fdfd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1112,19 +1112,16 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) struct folio_iter fi; bio_for_each_folio_all(fi, bio) { - struct page *page; - size_t done = 0; + size_t nr_pages; if (mark_dirty) { folio_lock(fi.folio); folio_mark_dirty(fi.folio); folio_unlock(fi.folio); } - page = folio_page(fi.folio, fi.offset / PAGE_SIZE); - do { - folio_put(fi.folio); - done += PAGE_SIZE; - } while (done < fi.length); + nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE - + fi.offset / PAGE_SIZE + 1; + folio_put_refs(fi.folio, nr_pages); } } EXPORT_SYMBOL_GPL(__bio_release_pages); -- GitLab From 052e4c8987ee9bef8994cf9c9166e2c6759e0752 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 13 Aug 2023 23:23:25 +0800 Subject: [PATCH 1315/1418] blk-mq: release scheduler resource when request completes commit e5c0ca13659e9d18f53368d651ed7e6e433ec1cf upstream. Chuck reported [1] an IO hang problem on NFS exports that reside on SATA devices and bisected to commit 615939a2ae73 ("blk-mq: defer to the normal submission path for post-flush requests"). We analysed the IO hang problem, found there are two postflush requests waiting for each other. The first postflush request completed the REQ_FSEQ_DATA sequence, so go to the REQ_FSEQ_POSTFLUSH sequence and added in the flush pending list, but failed to blk_kick_flush() because of the second postflush request which is inflight waiting in scheduler queue. The second postflush waiting in scheduler queue can't be dispatched because the first postflush hasn't released scheduler resource even though it has completed by itself. Fix it by releasing scheduler resource when the first postflush request completed, so the second postflush can be dispatched and completed, then make blk_kick_flush() succeed. While at it, remove the check for e->ops.finish_request, as all schedulers set that. Reaffirm this requirement by adding a WARN_ON_ONCE() at scheduler registration time, just like we do for insert_requests and dispatch_request. [1] https://lore.kernel.org/all/7A57C7AE-A51A-4254-888B-FE15CA21F9E9@oracle.com/ Link: https://lore.kernel.org/linux-block/20230819031206.2744005-1-chengming.zhou@linux.dev/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202308172100.8ce4b853-oliver.sang@intel.com Fixes: 615939a2ae73 ("blk-mq: defer to the normal submission path for post-flush requests") Reported-by: Chuck Lever Signed-off-by: Chengming Zhou Tested-by: Chuck Lever Link: https://lore.kernel.org/r/20230813152325.3017343-1-chengming.zhou@linux.dev [axboe: folded in incremental fix and added tags] Signed-off-by: Jens Axboe [bvanassche: changed RQF_USE_SCHED into RQF_ELVPRIV; restored the finish_request pointer check before calling finish_request and removed the new warning from the elevator code. This patch fixes an I/O hang when submitting a REQ_FUA request to a request queue for a zoned block device for which FUA has been disabled (QUEUE_FLAG_FUA is not set).] Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 7ed6b9469f979..07610505c1776 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -675,6 +675,22 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void blk_mq_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if ((rq->rq_flags & RQF_ELVPRIV) && + q->elevator->type->ops.finish_request) { + q->elevator->type->ops.finish_request(rq); + /* + * For postflush request that may need to be + * completed twice, we should clear this flag + * to avoid double finish_request() on the rq. + */ + rq->rq_flags &= ~RQF_ELVPRIV; + } +} + static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; @@ -701,9 +717,7 @@ void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - if ((rq->rq_flags & RQF_ELVPRIV) && - q->elevator->type->ops.finish_request) - q->elevator->type->ops.finish_request(rq); + blk_mq_finish_request(rq); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); @@ -1025,6 +1039,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (blk_mq_need_time_stamp(rq)) __blk_mq_end_request_acct(rq, ktime_get_ns()); + blk_mq_finish_request(rq); + if (rq->end_io) { rq_qos_done(rq->q, rq); if (rq->end_io(rq, error) == RQ_END_IO_FREE) @@ -1079,6 +1095,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + blk_mq_finish_request(rq); + rq_qos_done(rq->q, rq); /* -- GitLab From 6d87a9be22151997984a96e32b769b01de158c6f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 18:11:22 +0100 Subject: [PATCH 1316/1418] selftests: mptcp: diag: return KSFT_FAIL not test_cnt commit 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 upstream. The test counter 'test_cnt' should not be returned in diag.sh, e.g. what if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}', the whole test will be marked as skipped instead of 'failed'! So we should do ret=${KSFT_FAIL} instead. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Cc: stable@vger.kernel.org Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 400cf1ce96e31..3df4a8103c76f 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() echo "[ skip ] Feature probably not supported" else echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} fi else echo "[ ok ]" @@ -100,10 +100,10 @@ wait_msk_nr() printf "%-50s" "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" fi -- GitLab From 139dfcc4d723ab13469881200c7d80f49d776060 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:50 -0600 Subject: [PATCH 1317/1418] vfio/pci: Disable auto-enable of exclusive INTx IRQ [ Upstream commit fe9a7082684eb059b925c535682e68c34d487d43 ] Currently for devices requiring masking at the irqchip for INTx, ie. devices without DisINTx support, the IRQ is enabled in request_irq() and subsequently disabled as necessary to align with the masked status flag. This presents a window where the interrupt could fire between these events, resulting in the IRQ incrementing the disable depth twice. This would be unrecoverable for a user since the masked flag prevents nested enables through vfio. Instead, invert the logic using IRQF_NO_AUTOEN such that exclusive INTx is never auto-enabled, then unmask as required. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_intrs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 8c8b04d858454..4f0699215f12b 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -251,8 +251,15 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) vdev->ctx[0].trigger = trigger; + /* + * Devices without DisINTx support require an exclusive interrupt, + * IRQ masking is performed at the IRQ chip. The masked status is + * protected by vdev->irqlock. Setup the IRQ without auto-enable and + * unmask as necessary below under lock. DisINTx is unmodified by + * the IRQ configuration and may therefore use auto-enable. + */ if (!vdev->pci_2_3) - irqflags = 0; + irqflags = IRQF_NO_AUTOEN; ret = request_irq(pdev->irq, vfio_intx_handler, irqflags, vdev->ctx[0].name, vdev); @@ -263,13 +270,9 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) return ret; } - /* - * INTx disable will stick across the new irq setup, - * disable_irq won't. - */ spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && vdev->ctx[0].masked) - disable_irq_nosync(pdev->irq); + if (!vdev->pci_2_3 && !vdev->ctx[0].masked) + enable_irq(pdev->irq); spin_unlock_irqrestore(&vdev->irqlock, flags); return 0; -- GitLab From 33dc33f560017ffdf7a3db03b963f407082aab16 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:51 -0600 Subject: [PATCH 1318/1418] vfio: Introduce interface to flush virqfd inject workqueue [ Upstream commit b620ecbd17a03cacd06f014a5d3f3a11285ce053 ] In order to synchronize changes that can affect the thread callback, introduce an interface to force a flush of the inject workqueue. The irqfd pointer is only valid under spinlock, but the workqueue cannot be flushed under spinlock. Therefore the flush work for the irqfd is queued under spinlock. The vfio_irqfd_cleanup_wq workqueue is re-used for queuing this work such that flushing the workqueue is also ordered relative to shutdown. Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-4-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/virqfd.c | 21 +++++++++++++++++++++ include/linux/vfio.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index a928c68df4763..e06b32ddedced 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -104,6 +104,13 @@ static void virqfd_inject(struct work_struct *work) virqfd->thread(virqfd->opaque, virqfd->data); } +static void virqfd_flush_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); + + flush_work(&virqfd->inject); +} + int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), @@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque, INIT_WORK(&virqfd->shutdown, virqfd_shutdown); INIT_WORK(&virqfd->inject, virqfd_inject); + INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); irqfd = fdget(fd); if (!irqfd.file) { @@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + if (*pvirqfd && (*pvirqfd)->thread) + queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); + spin_unlock_irqrestore(&virqfd_lock, flags); + + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); + module_init(vfio_virqfd_init); module_exit(vfio_virqfd_exit); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index fdd393f70b198..5e7bf143cb223 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -268,6 +268,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -275,5 +276,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ -- GitLab From 7d29d4c72c1e196cce6969c98072a272d1a703b3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:52 -0600 Subject: [PATCH 1319/1418] vfio/pci: Create persistent INTx handler [ Upstream commit 18c198c96a815c962adc2b9b77909eec0be7df4d ] A vulnerability exists where the eventfd for INTx signaling can be deconfigured, which unregisters the IRQ handler but still allows eventfds to be signaled with a NULL context through the SET_IRQS ioctl or through unmask irqfd if the device interrupt is pending. Ideally this could be solved with some additional locking; the igate mutex serializes the ioctl and config space accesses, and the interrupt handler is unregistered relative to the trigger, but the irqfd path runs asynchronous to those. The igate mutex cannot be acquired from the atomic context of the eventfd wake function. Disabling the irqfd relative to the eventfd registration is potentially incompatible with existing userspace. As a result, the solution implemented here moves configuration of the INTx interrupt handler to track the lifetime of the INTx context object and irq_type configuration, rather than registration of a particular trigger eventfd. Synchronization is added between the ioctl path and eventfd_signal() wrapper such that the eventfd trigger can be dynamically updated relative to in-flight interrupts or irqfd callbacks. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reported-by: Reinette Chatre Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_intrs.c | 149 ++++++++++++++++-------------- 1 file changed, 82 insertions(+), 67 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 4f0699215f12b..03246a59b5536 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -55,8 +55,13 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; - if (likely(is_intx(vdev) && !vdev->virq_disabled)) - eventfd_signal(vdev->ctx[0].trigger, 1); + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct eventfd_ctx *trigger; + + trigger = READ_ONCE(vdev->ctx[0].trigger); + if (likely(trigger)) + eventfd_signal(trigger, 1); + } } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ @@ -191,98 +196,104 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) return ret; } -static int vfio_intx_enable(struct vfio_pci_core_device *vdev) +static int vfio_intx_enable(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { + struct pci_dev *pdev = vdev->pdev; + unsigned long irqflags; + char *name; + int ret; + if (!is_irq_none(vdev)) return -EINVAL; - if (!vdev->pdev->irq) + if (!pdev->irq) return -ENODEV; + name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); + if (!name) + return -ENOMEM; + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; vdev->num_ctx = 1; + vdev->ctx[0].name = name; + vdev->ctx[0].trigger = trigger; + /* - * If the virtual interrupt is masked, restore it. Devices - * supporting DisINTx can be masked at the hardware level - * here, non-PCI-2.3 devices will have to wait until the - * interrupt is enabled. + * Fill the initial masked state based on virq_disabled. After + * enable, changing the DisINTx bit in vconfig directly changes INTx + * masking. igate prevents races during setup, once running masked + * is protected via irqlock. + * + * Devices supporting DisINTx also reflect the current mask state in + * the physical DisINTx bit, which is not affected during IRQ setup. + * + * Devices without DisINTx support require an exclusive interrupt. + * IRQ masking is performed at the IRQ chip. Again, igate protects + * against races during setup and IRQ handlers and irqfds are not + * yet active, therefore masked is stable and can be used to + * conditionally auto-enable the IRQ. + * + * irq_type must be stable while the IRQ handler is registered, + * therefore it must be set before request_irq(). */ vdev->ctx[0].masked = vdev->virq_disabled; - if (vdev->pci_2_3) - pci_intx(vdev->pdev, !vdev->ctx[0].masked); + if (vdev->pci_2_3) { + pci_intx(pdev, !vdev->ctx[0].masked); + irqflags = IRQF_SHARED; + } else { + irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0; + } vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + ret = request_irq(pdev->irq, vfio_intx_handler, + irqflags, vdev->ctx[0].name, vdev); + if (ret) { + vdev->irq_type = VFIO_PCI_NUM_IRQS; + kfree(name); + vdev->num_ctx = 0; + kfree(vdev->ctx); + return ret; + } + return 0; } -static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { struct pci_dev *pdev = vdev->pdev; - unsigned long irqflags = IRQF_SHARED; - struct eventfd_ctx *trigger; - unsigned long flags; - int ret; + struct eventfd_ctx *old; - if (vdev->ctx[0].trigger) { - free_irq(pdev->irq, vdev); - kfree(vdev->ctx[0].name); - eventfd_ctx_put(vdev->ctx[0].trigger); - vdev->ctx[0].trigger = NULL; - } - - if (fd < 0) /* Disable only */ - return 0; + old = vdev->ctx[0].trigger; - vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", - pci_name(pdev)); - if (!vdev->ctx[0].name) - return -ENOMEM; + WRITE_ONCE(vdev->ctx[0].trigger, trigger); - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(vdev->ctx[0].name); - return PTR_ERR(trigger); - } - - vdev->ctx[0].trigger = trigger; - - /* - * Devices without DisINTx support require an exclusive interrupt, - * IRQ masking is performed at the IRQ chip. The masked status is - * protected by vdev->irqlock. Setup the IRQ without auto-enable and - * unmask as necessary below under lock. DisINTx is unmodified by - * the IRQ configuration and may therefore use auto-enable. - */ - if (!vdev->pci_2_3) - irqflags = IRQF_NO_AUTOEN; - - ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, vdev->ctx[0].name, vdev); - if (ret) { - vdev->ctx[0].trigger = NULL; - kfree(vdev->ctx[0].name); - eventfd_ctx_put(trigger); - return ret; + /* Releasing an old ctx requires synchronizing in-flight users */ + if (old) { + synchronize_irq(pdev->irq); + vfio_virqfd_flush_thread(&vdev->ctx[0].unmask); + eventfd_ctx_put(old); } - spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && !vdev->ctx[0].masked) - enable_irq(pdev->irq); - spin_unlock_irqrestore(&vdev->irqlock, flags); - return 0; } static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { + struct pci_dev *pdev = vdev->pdev; + vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&vdev->ctx[0].mask); - vfio_intx_set_signal(vdev, -1); + free_irq(pdev->irq, vdev); + if (vdev->ctx[0].trigger) + eventfd_ctx_put(vdev->ctx[0].trigger); + kfree(vdev->ctx[0].name); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -534,19 +545,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + struct eventfd_ctx *trigger = NULL; int32_t fd = *(int32_t *)data; int ret; - if (is_intx(vdev)) - return vfio_intx_set_signal(vdev, fd); + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) + return PTR_ERR(trigger); + } - ret = vfio_intx_enable(vdev); - if (ret) - return ret; + if (is_intx(vdev)) + ret = vfio_intx_set_signal(vdev, trigger); + else + ret = vfio_intx_enable(vdev, trigger); - ret = vfio_intx_set_signal(vdev, fd); - if (ret) - vfio_intx_disable(vdev); + if (ret && trigger) + eventfd_ctx_put(trigger); return ret; } -- GitLab From 7932db06c82c5b2f42a4d1a849d97dba9ce4a362 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:53 -0600 Subject: [PATCH 1320/1418] vfio/platform: Create persistent IRQ handlers [ Upstream commit 675daf435e9f8e5a5eab140a9864dfad6668b375 ] The vfio-platform SET_IRQS ioctl currently allows loopback triggering of an interrupt before a signaling eventfd has been configured by the user, which thereby allows a NULL pointer dereference. Rather than register the IRQ relative to a valid trigger, register all IRQs in a disabled state in the device open path. This allows mask operations on the IRQ to nest within the overall enable state governed by a valid eventfd signal. This decouples @masked, protected by the @locked spinlock from @trigger, protected via the @igate mutex. In doing so, it's guaranteed that changes to @trigger cannot race the IRQ handlers because the IRQ handler is synchronously disabled before modifying the trigger, and loopback triggering of the IRQ via ioctl is safe due to serialization with trigger changes via igate. For compatibility, request_irq() failures are maintained to be local to the SET_IRQS ioctl rather than a fatal error in the open device path. This allows, for example, a userspace driver with polling mode support to continue to work regardless of moving the request_irq() call site. This necessarily blocks all SET_IRQS access to the failed index. Cc: Eric Auger Cc: Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/platform/vfio_platform_irq.c | 101 +++++++++++++++------- 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index f2893f2fcaabd..7f4341a8d7185 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return 0; } +/* + * The trigger eventfd is guaranteed valid in the interrupt path + * and protected by the igate mutex when triggered via ioctl. + */ +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) +{ + if (likely(irq_ctx->trigger)) + eventfd_signal(irq_ctx->trigger, 1); +} + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) spin_unlock_irqrestore(&irq_ctx->lock, flags); if (ret == IRQ_HANDLED) - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return ret; } @@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return IRQ_HANDLED; } static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, - int fd, irq_handler_t handler) + int fd) { struct vfio_platform_irq *irq = &vdev->irqs[index]; struct eventfd_ctx *trigger; - int ret; if (irq->trigger) { - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); - free_irq(irq->hwirq, irq); - kfree(irq->name); + disable_irq(irq->hwirq); eventfd_ctx_put(irq->trigger); irq->trigger = NULL; } @@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, if (fd < 0) /* Disable only */ return 0; - irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", - irq->hwirq, vdev->name); - if (!irq->name) - return -ENOMEM; - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(irq->name); + if (IS_ERR(trigger)) return PTR_ERR(trigger); - } irq->trigger = trigger; - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); - if (ret) { - kfree(irq->name); - eventfd_ctx_put(trigger); - irq->trigger = NULL; - return ret; - } - - if (!irq->masked) - enable_irq(irq->hwirq); + /* + * irq->masked effectively provides nested disables within the overall + * enable relative to trigger. Specifically request_irq() is called + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user + * may only further disable the IRQ with a MASK operations because + * irq->masked is initially false. + */ + enable_irq(irq->hwirq); return 0; } @@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, handler = vfio_irq_handler; if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) - return vfio_set_trigger(vdev, index, -1, handler); + return vfio_set_trigger(vdev, index, -1); if (start != 0 || count != 1) return -EINVAL; @@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; - return vfio_set_trigger(vdev, index, fd, handler); + return vfio_set_trigger(vdev, index, fd); } if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, unsigned start, unsigned count, uint32_t flags, void *data) = NULL; + /* + * For compatibility, errors from request_irq() are local to the + * SET_IRQS path and reflected in the name pointer. This allows, + * for example, polling mode fallback for an exclusive IRQ failure. + */ + if (IS_ERR(vdev->irqs[index].name)) + return PTR_ERR(vdev->irqs[index].name); + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: func = vfio_platform_set_irq_mask; @@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, int vfio_platform_irq_init(struct vfio_platform_device *vdev) { - int cnt = 0, i; + int cnt = 0, i, ret = 0; while (vdev->get_irq(vdev, cnt) >= 0) cnt++; @@ -292,29 +297,54 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) for (i = 0; i < cnt; i++) { int hwirq = vdev->get_irq(vdev, i); + irq_handler_t handler = vfio_irq_handler; - if (hwirq < 0) + if (hwirq < 0) { + ret = -EINVAL; goto err; + } spin_lock_init(&vdev->irqs[i].lock); vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED; + handler = vfio_automasked_irq_handler; + } vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; vdev->irqs[i].masked = false; + vdev->irqs[i].name = kasprintf(GFP_KERNEL, + "vfio-irq[%d](%s)", hwirq, + vdev->name); + if (!vdev->irqs[i].name) { + ret = -ENOMEM; + goto err; + } + + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, + vdev->irqs[i].name, &vdev->irqs[i]); + if (ret) { + kfree(vdev->irqs[i].name); + vdev->irqs[i].name = ERR_PTR(ret); + } } vdev->num_irqs = cnt; return 0; err: + for (--i; i >= 0; i--) { + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + kfree(vdev->irqs[i].name); + } + } kfree(vdev->irqs); - return -EINVAL; + return ret; } void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) @@ -324,7 +354,12 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) for (i = 0; i < vdev->num_irqs; i++) { vfio_virqfd_disable(&vdev->irqs[i].mask); vfio_virqfd_disable(&vdev->irqs[i].unmask); - vfio_set_trigger(vdev, i, -1, NULL); + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + if (vdev->irqs[i].trigger) + eventfd_ctx_put(vdev->irqs[i].trigger); + kfree(vdev->irqs[i].name); + } } vdev->num_irqs = 0; -- GitLab From 083e750c9f5f4c3bf61161330fb84d7c8e8bb417 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:54 -0600 Subject: [PATCH 1321/1418] vfio/fsl-mc: Block calling interrupt handler without trigger [ Upstream commit 7447d911af699a15f8d050dfcb7c680a86f87012 ] The eventfd_ctx trigger pointer of the vfio_fsl_mc_irq object is initially NULL and may become NULL if the user sets the trigger eventfd to -1. The interrupt handler itself is guaranteed that trigger is always valid between request_irq() and free_irq(), but the loopback testing mechanisms to invoke the handler function need to test the trigger. The triggering and setting ioctl paths both make use of igate and are therefore mutually exclusive. The vfio-fsl-mc driver does not make use of irqfds, nor does it support any sort of masking operations, therefore unlike vfio-pci and vfio-platform, the flow can remain essentially unchanged. Cc: Diana Craciun Cc: Fixes: cc0ee20bd969 ("vfio/fsl-mc: trigger an interrupt via eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-8-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 7b428eac3d3e5..b125b6edf634e 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, irq = &vdev->mc_irqs[index]; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_fsl_mc_irq_handler(hwirq, irq); + if (irq->trigger) + eventfd_signal(irq->trigger, 1); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { u8 trigger = *(u8 *)data; - if (trigger) - vfio_fsl_mc_irq_handler(hwirq, irq); + if (trigger && irq->trigger) + eventfd_signal(irq->trigger, 1); } return 0; -- GitLab From 452a382970640b27c8f0b97347eaf4c2902cf572 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 18 Mar 2023 12:56:33 +0100 Subject: [PATCH 1322/1418] x86/coco: Export cc_vendor commit 3d91c537296794d5d0773f61abbe7b63f2f132d8 upstream. It will be used in different checks in future changes. Export it directly and provide accessor functions and stubs so this can be used in general code when CONFIG_ARCH_HAS_CC_PLATFORM is not set. No functional changes. [ tglx: Add accessor functions ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230318115634.9392-2-bp@alien8.de Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 13 ++++--------- arch/x86/include/asm/coco.h | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f8814846..684f0a910475c 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,7 +13,7 @@ #include #include -static enum cc_vendor vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init; static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) @@ -83,7 +83,7 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: @@ -105,7 +105,7 @@ u64 cc_mkenc(u64 val) * - for AMD, bit *set* means the page is encrypted * - for Intel *clear* means encrypted. */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val | cc_mask; case CC_VENDOR_INTEL: @@ -118,7 +118,7 @@ u64 cc_mkenc(u64 val) u64 cc_mkdec(u64 val) { /* See comment in cc_mkenc() */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val & ~cc_mask; case CC_VENDOR_INTEL: @@ -129,11 +129,6 @@ u64 cc_mkdec(u64 val) } EXPORT_SYMBOL_GPL(cc_mkdec); -__init void cc_set_vendor(enum cc_vendor v) -{ - vendor = v; -} - __init void cc_set_mask(u64 mask) { cc_mask = mask; diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34f..91b9448ffe768 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -11,13 +11,30 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -void cc_set_vendor(enum cc_vendor v); -void cc_set_mask(u64 mask); - #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern enum cc_vendor cc_vendor; + +static inline enum cc_vendor cc_get_vendor(void) +{ + return cc_vendor; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) +{ + cc_vendor = vendor; +} + +void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else +static inline enum cc_vendor cc_get_vendor(void) +{ + return CC_VENDOR_NONE; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) { } + static inline u64 cc_mkenc(u64 val) { return val; -- GitLab From 851ddc3587379975c6d5da7ef0ac39b70bd063e5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 8 May 2023 12:44:28 +0200 Subject: [PATCH 1323/1418] x86/coco: Get rid of accessor functions commit da86eb9611840772a459693832e54c63cbcc040a upstream. cc_vendor is __ro_after_init and thus can be used directly. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230508121957.32341-1-bp@alien8.de Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 2 +- arch/x86/coco/tdx/tdx.c | 2 +- arch/x86/include/asm/coco.h | 19 +------------------ arch/x86/kernel/cpu/mshyperv.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 2 +- 5 files changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 684f0a910475c..1e73254336a62 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,7 +13,7 @@ #include #include -enum cc_vendor cc_vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index d0565a9e7d8c9..4692450aeb4d3 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -793,7 +793,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); - cc_set_vendor(CC_VENDOR_INTEL); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 91b9448ffe768..75a0d7b1a906a 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -11,30 +11,13 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -#ifdef CONFIG_ARCH_HAS_CC_PLATFORM extern enum cc_vendor cc_vendor; -static inline enum cc_vendor cc_get_vendor(void) -{ - return cc_vendor; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) -{ - cc_vendor = vendor; -} - +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else -static inline enum cc_vendor cc_get_vendor(void) -{ - return CC_VENDOR_NONE; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) { } - static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 34d9e899e471e..9b039e9635e40 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -344,7 +344,7 @@ static void __init ms_hyperv_init_platform(void) /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); + cc_vendor = CC_VENDOR_HYPERV; } } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index a11a6ebbf5ecf..4daeefa011ed3 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -608,7 +608,7 @@ void __init sme_enable(struct boot_params *bp) out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(sme_me_mask); } } -- GitLab From 77e3de4113958c06a1f6696adefc9baf0b6a2659 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 2 Feb 2024 17:29:32 +0100 Subject: [PATCH 1324/1418] x86/Kconfig: Remove CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT commit 29956748339aa8757a7e2f927a8679dd08f24bb6 upstream. It was meant well at the time but nothing's using it so get rid of it. Signed-off-by: Borislav Petkov (AMD) Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240202163510.GDZb0Zvj8qOndvFOiZ@fat_crate.local Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 4 +--- Documentation/x86/amd-memory-encryption.rst | 16 ++++++++-------- arch/x86/Kconfig | 13 ------------- arch/x86/mm/mem_encrypt_identity.c | 11 +---------- 4 files changed, 10 insertions(+), 34 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f7dfdd3d021ea..88dffaf8f0a99 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3206,9 +3206,7 @@ mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control Valid arguments: on, off - Default (depends on kernel configuration option): - on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) - off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n) + Default: off mem_encrypt=on: Activate SME mem_encrypt=off: Do not activate SME diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index 934310ce72582..bace87cc9ca2c 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -87,14 +87,14 @@ The state of SME in the Linux kernel can be documented as follows: kernel is non-zero). SME can also be enabled and activated in the BIOS. If SME is enabled and -activated in the BIOS, then all memory accesses will be encrypted and it will -not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate -memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or -by supplying mem_encrypt=on on the kernel command line. However, if BIOS does -not enable SME, then Linux will not be able to activate memory encryption, even -if configured to do so by default or the mem_encrypt=on command line parameter -is specified. +activated in the BIOS, then all memory accesses will be encrypted and it +will not be necessary to activate the Linux memory encryption support. + +If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), +then memory encryption can be enabled by supplying mem_encrypt=on on the +kernel command line. However, if BIOS does not enable SME, then Linux +will not be able to activate memory encryption, even if configured to do +so by default or the mem_encrypt=on command line parameter is specified. Secure Nested Paging (SNP) ========================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5caa023e98397..bea53385d31e3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1553,19 +1553,6 @@ config AMD_MEM_ENCRYPT This requires an AMD processor that supports Secure Memory Encryption (SME). -config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT - bool "Activate AMD Secure Memory Encryption (SME) by default" - depends on AMD_MEM_ENCRYPT - help - Say yes to have system memory encrypted by default if running on - an AMD processor that supports Secure Memory Encryption (SME). - - If set to Y, then the encryption of system memory can be - deactivated with the mem_encrypt=off command line option. - - If set to N, then the encryption of system memory can be - activated with the mem_encrypt=on command line option. - # Common NUMA Features config NUMA bool "NUMA Memory Allocation and Scheduler Support" diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 4daeefa011ed3..7d96904230af0 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static char sme_cmdline_off[] __initdata = "off"; static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { @@ -504,7 +503,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) void __init sme_enable(struct boot_params *bp) { - const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; + const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; unsigned long me_mask; @@ -587,12 +586,6 @@ void __init sme_enable(struct boot_params *bp) asm ("lea sme_cmdline_on(%%rip), %0" : "=r" (cmdline_on) : "p" (sme_cmdline_on)); - asm ("lea sme_cmdline_off(%%rip), %0" - : "=r" (cmdline_off) - : "p" (sme_cmdline_off)); - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - sme_me_mask = me_mask; cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); @@ -602,8 +595,6 @@ void __init sme_enable(struct boot_params *bp) if (!strncmp(buffer, cmdline_on, sizeof(buffer))) sme_me_mask = me_mask; - else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) - sme_me_mask = 0; out: if (sme_me_mask) { -- GitLab From fe272b61506bb1534922ef07aa165fd3c37a6a90 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 3 Feb 2024 13:53:06 +0100 Subject: [PATCH 1325/1418] x86/sev: Fix position dependent variable references in startup code commit 1c811d403afd73f04bde82b83b24c754011bd0e8 upstream. The early startup code executes from a 1:1 mapping of memory, which differs from the mapping that the code was linked and/or relocated to run at. The latter mapping is not active yet at this point, and so symbol references that rely on it will fault. Given that the core kernel is built without -fPIC, symbol references are typically emitted as absolute, and so any such references occuring in the early startup code will therefore crash the kernel. While an attempt was made to work around this for the early SEV/SME startup code, by forcing RIP-relative addressing for certain global SEV/SME variables via inline assembly (see snp_cpuid_get_table() for example), RIP-relative addressing must be pervasively enforced for SEV/SME global variables when accessed prior to page table fixups. __startup_64() already handles this issue for select non-SEV/SME global variables using fixup_pointer(), which adjusts the pointer relative to a `physaddr` argument. To avoid having to pass around this `physaddr` argument across all functions needing to apply pointer fixups, introduce a macro RIP_RELATIVE_REF() which generates a RIP-relative reference to a given global variable. It is used where necessary to force RIP-relative accesses to global variables. For backporting purposes, this patch makes no attempt at cleaning up other occurrences of this pattern, involving either inline asm or fixup_pointer(). Those will be addressed later. [ bp: Call it "rip_rel_ref" everywhere like other code shortens "rIP-relative reference" and make the asm wrapper __always_inline. ] Co-developed-by: Kevin Loughlin Signed-off-by: Kevin Loughlin Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/all/20240130220845.1978329-1-kevinloughlin@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 7 +------ arch/x86/include/asm/asm.h | 14 ++++++++++++++ arch/x86/include/asm/coco.h | 8 +++++++- arch/x86/include/asm/mem_encrypt.h | 15 +++++++++------ arch/x86/kernel/sev-shared.c | 12 ++++++------ arch/x86/kernel/sev.c | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 27 ++++++++++++--------------- 7 files changed, 51 insertions(+), 36 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 1e73254336a62..1d3ad275c3664 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -14,7 +14,7 @@ #include enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; -static u64 cc_mask __ro_after_init; +u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) { @@ -128,8 +128,3 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); - -__init void cc_set_mask(u64 mask) -{ - cc_mask = mask; -} diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index fbcfec4dc4ccd..ca8eed1d496ab 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -113,6 +113,20 @@ #endif +#ifndef __ASSEMBLY__ +#ifndef __pic__ +static __always_inline __pure void *rip_rel_ptr(void *p) +{ + asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); + + return p; +} +#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) +#else +#define RIP_REL_REF(var) (var) +#endif +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 75a0d7b1a906a..60bb26097da1a 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_COCO_H #define _ASM_X86_COCO_H +#include #include enum cc_vendor { @@ -12,9 +13,14 @@ enum cc_vendor { }; extern enum cc_vendor cc_vendor; +extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -void cc_set_mask(u64 mask); +static inline void cc_set_mask(u64 mask) +{ + RIP_REL_REF(cc_mask) = mask; +} + u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c91326593e741..41d06822bc8cd 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -15,7 +15,8 @@ #include #include -#include +#include +struct boot_params; #ifdef CONFIG_X86_MEM_ENCRYPT void __init mem_encrypt_init(void); @@ -57,6 +58,11 @@ void __init mem_encrypt_free_decrypted_mem(void); void __init sev_es_init_vc_handling(void); +static inline u64 sme_get_me_mask(void) +{ + return RIP_REL_REF(sme_me_mask); +} + #define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -88,6 +94,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en static inline void mem_encrypt_free_decrypted_mem(void) { } +static inline u64 sme_get_me_mask(void) { return 0; } + #define __bss_decrypted #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -105,11 +113,6 @@ void add_encrypt_protection_map(void); extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -static inline u64 sme_get_me_mask(void) -{ - return sme_me_mask; -} - #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 71d8698702ce3..271e70d5748ef 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -553,9 +553,9 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; /* Skip post-processing for out-of-range zero leafs. */ - if (!(leaf->fn <= cpuid_std_range_max || - (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || - (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) + if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || + (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || + (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) return 0; } @@ -1060,10 +1060,10 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; if (fn->eax_in == 0x0) - cpuid_std_range_max = fn->eax; + RIP_REL_REF(cpuid_std_range_max) = fn->eax; else if (fn->eax_in == 0x40000000) - cpuid_hyp_range_max = fn->eax; + RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; else if (fn->eax_in == 0x80000000) - cpuid_ext_range_max = fn->eax; + RIP_REL_REF(cpuid_ext_range_max) = fn->eax; } } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c8dfb0fdde7f9..f93ff4794e38f 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -736,7 +736,7 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -758,7 +758,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 7d96904230af0..06ccbd36e8dcd 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -304,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * instrumentation or checking boot_cpu_data in the cc_platform_has() * function. */ - if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) + if (!sme_get_me_mask() || + RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) return; /* @@ -541,11 +542,11 @@ void __init sme_enable(struct boot_params *bp) me_mask = 1UL << (ebx & 0x3f); /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); + feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ - if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED)) snp_abort(); /* Check if memory encryption is enabled */ @@ -571,7 +572,6 @@ void __init sme_enable(struct boot_params *bp) return; } else { /* SEV state cannot be controlled by a command line option */ - sme_me_mask = me_mask; goto out; } @@ -590,16 +590,13 @@ void __init sme_enable(struct boot_params *bp) cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); - if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) - goto out; - - if (!strncmp(buffer, cmdline_on, sizeof(buffer))) - sme_me_mask = me_mask; + if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || + strncmp(buffer, cmdline_on, sizeof(buffer))) + return; out: - if (sme_me_mask) { - physical_mask &= ~sme_me_mask; - cc_vendor = CC_VENDOR_AMD; - cc_set_mask(sme_me_mask); - } + RIP_REL_REF(sme_me_mask) = me_mask; + physical_mask &= ~me_mask; + cc_vendor = CC_VENDOR_AMD; + cc_set_mask(me_mask); } -- GitLab From 3372c3ffc34509cfd2f66761a1f568b78daed04f Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 6 Mar 2024 10:52:17 -0500 Subject: [PATCH 1326/1418] mm/migrate: set swap entry values of THP tail pages properly. The tail pages in a THP can have swap entry information stored in their private field. When migrating to a new page, all tail pages of the new page need to update ->private to avoid future data corruption. This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory: work on folio->swap instead of page->private when splitting folio"), subpages of a swapcached THP no longer requires the maintenance. Adding THPs to the swapcache was introduced in commit 38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"), where each subpage of a THP added to the swapcache had its own swapcache entry and required the ->private field to point to the correct swapcache entry. Later, when THP migration functionality was implemented in commit 616b8371539a6 ("mm: thp: enable thp migration in generic path"), it initially did not handle the subpages of swapcached THPs, failing to update their ->private fields or replace the subpage pointers in the swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration for shmem thp") addressed the swapcache update aspect. This patch fixes the update of subpage ->private fields. Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_charante@quicinc.com/ Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path") Signed-off-by: Zi Yan Acked-by: David Hildenbrand Reported-and-tested-by: Charan Teja Kalla Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index c93dd6a31c31a..c5968021fde0a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -423,8 +423,12 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_test_swapbacked(folio)) { __folio_set_swapbacked(newfolio); if (folio_test_swapcache(folio)) { + int i; + folio_set_swapcache(newfolio); - newfolio->private = folio_get_private(folio); + for (i = 0; i < nr; i++) + set_page_private(folio_page(newfolio, i), + page_private(folio_page(folio, i))); } entries = nr; } else { -- GitLab From bf2ba0b241e84e1321cefb02b8a68a0a078dcae5 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Sun, 17 Mar 2024 15:15:22 -0700 Subject: [PATCH 1327/1418] init: open /initrd.image with O_LARGEFILE commit 4624b346cf67400ef46a31771011fb798dd2f999 upstream. If initrd data is larger than 2Gb, we'll eventually fail to write to the /initrd.image file when we hit that limit, unless O_LARGEFILE is set. Link: https://lkml.kernel.org/r/20240317221522.896040-1-jsperbeck@google.com Signed-off-by: John Sperbeck Cc: Jens Axboe Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- init/initramfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/initramfs.c b/init/initramfs.c index 2f5bfb7d76521..7b915170789da 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -680,7 +680,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; -- GitLab From ca8106fffc11cae54adbc3b2f7c77cbeee4a58a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 24 Mar 2024 17:10:53 +0100 Subject: [PATCH 1328/1418] x86/efistub: Add missing boot_params for mixed mode compat entry commit d21f5a59ea773826cc489acb287811d690b703cc upstream. The pure EFI stub entry point does not take a struct boot_params from the boot loader, but creates it from scratch, and populates only the fields that still have meaning in this context (command line, initrd base and size, etc) The original mixed mode implementation used the EFI handover protocol instead, where the boot loader (i.e., GRUB) populates a boot_params struct and passes it to a special Linux specific EFI entry point that takes the boot_params pointer as its third argument. When the new mixed mode implementation was introduced, using a special 32-bit PE entrypoint in the 64-bit kernel, it adopted the pure approach, and relied on the EFI stub to create the struct boot_params. This is preferred because it makes the bootloader side much easier to implement, as it does not need any x86-specific knowledge on how struct boot_params and struct setup_header are put together. This mixed mode implementation was adopted by systemd-boot version 252 and later. When commit e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") refactored this code and moved it out of head_64.S, the fact that ESI was populated with the address of the base of the image was overlooked, and to simplify the code flow, ESI is now zeroed and stored to memory unconditionally in shared code, so that the NULL-ness of that variable can still be used later to determine which mixed mode boot protocol is in use. With ESI pointing to the base of the image, it can serve as a struct boot_params pointer for startup_32(), which only accesses the init_data and kernel_alignment fields (and the scratch field as a temporary stack). Zeroing ESI means that those accesses produce garbage now, even though things appear to work if the first page of memory happens to be zeroed, and the region right before LOAD_PHYSICAL_ADDR (== 16 MiB) happens to be free. The solution is to pass a special, temporary struct boot_params to startup_32() via ESI, one that is sufficient for getting it to create the page tables correctly and is discarded right after. This involves setting a minimal alignment of 4k, only to get the statically allocated page tables line up correctly, and setting init_size to the executable image size (_end - startup_32). This ensures that the page tables are covered by the static footprint of the PE image. Given that EFI boot no longer calls the decompressor and no longer pads the image to permit the decompressor to execute in place, the same temporary struct boot_params should be used in the EFI handover protocol based mixed mode implementation as well, to prevent the page tables from being placed outside of allocated memory. Fixes: e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") Cc: # v6.1+ Closes: https://lore.kernel.org/all/20240321150510.GI8211@craftyguy.net/ Reported-by: Clayton Craft Tested-by: Clayton Craft Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 07873f269b7bd..fb6d60dcd6ed1 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include +#include #include #include #include #include +#include .code64 .text @@ -155,6 +157,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -169,6 +172,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -245,8 +249,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -272,9 +274,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -300,8 +311,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers -- GitLab From 6b226ae43d82a43819d60fea0d66772ed1b88e77 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Mar 2024 11:15:25 +0100 Subject: [PATCH 1329/1418] efi/libstub: Cast away type warning in use of max() commit 61d130f261a3c15ae2c4b6f3ac3517d5d5b78855 upstream. Avoid a type mismatch warning in max() by switching to max_t() and providing the type explicitly. Fixes: 3cb4a4827596abc82e ("efi/libstub: fix efi_random_alloc() ...") Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/randomalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 0d7b11b55ff31..fff826f56728c 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -119,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; + target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, -- GitLab From a321a9907c0e02d8314f6b3060bcaa177edc105c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 21 Feb 2024 07:35:52 -0800 Subject: [PATCH 1330/1418] btrfs: zoned: don't skip block groups with 100% zone unusable commit a8b70c7f8600bc77d03c0b032c0662259b9e615e upstream. Commit f4a9f219411f ("btrfs: do not delete unused block group if it may be used soon") changed the behaviour of deleting unused block-groups on zoned filesystems. Starting with this commit, we're using btrfs_space_info_used() to calculate the number of used bytes in a space_info. But btrfs_space_info_used() also accounts btrfs_space_info::bytes_zone_unusable as used bytes. So if a block group is 100% zone_unusable it is skipped from the deletion step. In order not to skip fully zone_unusable block-groups, also check if the block-group has bytes left that can be used on a zoned filesystem. Fixes: f4a9f219411f ("btrfs: do not delete unused block group if it may be used soon") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9f77565bd7f5a..5993b627be580 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1413,7 +1413,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used) { + if (space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the -- GitLab From d7387bcb7781f5ede130fec511d4ef5c415c34d4 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 26 Feb 2024 16:39:13 +0100 Subject: [PATCH 1331/1418] btrfs: zoned: use zone aware sb location for scrub commit 74098a989b9c3370f768140b7783a7aaec2759b3 upstream. At the moment scrub_supers() doesn't grab the super block's location via the zoned device aware btrfs_sb_log_location() but via btrfs_sb_offset(). This leads to checksum errors on 'scrub' as we're not accessing the correct location of the super block. So use btrfs_sb_log_location() for getting the super blocks location on scrub. Reported-by: WA AM Link: http://lore.kernel.org/linux-btrfs/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXptPUKw@mail.gmail.com CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/scrub.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1672d4846baaf..12a2b1e3f1e35 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4177,7 +4177,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = fs_info->last_trans_committed; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; -- GitLab From 7eeabcea79b67cc29563e6a9a5c81f9e2c664d5b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 16 Mar 2024 08:43:36 +0100 Subject: [PATCH 1332/1418] wifi: mac80211: check/clear fast rx for non-4addr sta VLAN changes commit 4f2bdb3c5e3189297e156b3ff84b140423d64685 upstream. When moving a station out of a VLAN and deleting the VLAN afterwards, the fast_rx entry still holds a pointer to the VLAN's netdev, which can cause use-after-free bugs. Fix this by immediately calling ieee80211_check_fast_rx after the VLAN change. Cc: stable@vger.kernel.org Reported-by: ranygh@riseup.net Signed-off-by: Felix Fietkau Link: https://msgid.link/20240316074336.40442-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6cf0b77839d1d..1e57027da2913 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2075,15 +2075,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { -- GitLab From 7aa70c492272aa003cb52288e75bc255ec182157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2024 10:10:20 +0200 Subject: [PATCH 1333/1418] wifi: iwlwifi: fw: don't always use FW dump trig commit 045a5b645dd59929b0e05375f493cde3a0318271 upstream. Since the dump_data (struct iwl_fwrt_dump_data) is a union, it's not safe to unconditionally access and use the 'trig' member, it might be 'desc' instead. Access it only if it's known to be 'trig' rather than 'desc', i.e. if ini-debug is present. Cc: stable@vger.kernel.org Fixes: 0eb50c674a1e ("iwlwifi: yoyo: send hcmd to fw after dump collection completes.") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240319100755.e2976bc58b29.I72fbd6135b3623227de53d8a2bb82776066cb72b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 3ef0b776b7727..3b0ed1cdfa11e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2903,8 +2903,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) struct iwl_fw_dbg_params params = {0}; struct iwl_fwrt_dump_data *dump_data = &fwrt->dump.wks[wk_idx].dump_data; - u32 policy; - u32 time_point; if (!test_bit(wk_idx, &fwrt->dump.active_wks)) return; @@ -2935,13 +2933,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false); - policy = le32_to_cpu(dump_data->trig->apply_policy); - time_point = le32_to_cpu(dump_data->trig->time_point); + if (iwl_trans_dbg_ini_valid(fwrt->trans)) { + u32 policy = le32_to_cpu(dump_data->trig->apply_policy); + u32 time_point = le32_to_cpu(dump_data->trig->time_point); - if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { - IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); - iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { + IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); + iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + } } + if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) iwl_force_nmi(fwrt->trans); -- GitLab From 4e79b4a64d087cb5723d0ee947e8996da9e82363 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 20 Mar 2024 11:26:07 -0700 Subject: [PATCH 1334/1418] exec: Fix NOMMU linux_binprm::exec in transfer_args_to_stack() commit 2aea94ac14d1e0a8ae9e34febebe208213ba72f7 upstream. In NOMMU kernel the value of linux_binprm::p is the offset inside the temporary program arguments array maintained in separate pages in the linux_binprm::page. linux_binprm::exec being a copy of linux_binprm::p thus must be adjusted when that array is copied to the user stack. Without that adjustment the value passed by the NOMMU kernel to the ELF program in the AT_EXECFN entry of the aux array doesn't make any sense and it may break programs that try to access memory pointed to by that entry. Adjust linux_binprm::exec before the successful return from the transfer_args_to_stack(). Cc: Fixes: b6a2fea39318 ("mm: variable length argument support") Fixes: 5edc2a5123a7 ("binfmt_elf_fdpic: wire up AT_EXECFD, AT_EXECFN, AT_SECURE") Signed-off-by: Max Filippov Link: https://lore.kernel.org/r/20240320182607.1472887-1-jcmvbkbc@gmail.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/exec.c b/fs/exec.c index 39f7751c90fc3..b01434d6a512d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -896,6 +896,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: -- GitLab From 192058bb3e1b4fc429689f8e2b57630f3beb7ae2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 19 Mar 2024 17:37:46 -0700 Subject: [PATCH 1335/1418] hexagon: vmlinux.lds.S: handle attributes section commit 549aa9678a0b3981d4821bf244579d9937650562 upstream. After the linked LLVM change, the build fails with CONFIG_LD_ORPHAN_WARN_LEVEL="error", which happens with allmodconfig: ld.lld: error: vmlinux.a(init/main.o):(.hexagon.attributes) is being placed in '.hexagon.attributes' Handle the attributes section in a similar manner as arm and riscv by adding it after the primary ELF_DETAILS grouping in vmlinux.lds.S, which fixes the error. Link: https://lkml.kernel.org/r/20240319-hexagon-handle-attributes-section-vmlinux-lds-s-v1-1-59855dab8872@kernel.org Fixes: 113616ec5b64 ("hexagon: select ARCH_WANT_LD_ORPHAN_WARN") Link: https://github.com/llvm/llvm-project/commit/31f4b329c8234fab9afa59494d7f8bdaeaefeaad Signed-off-by: Nathan Chancellor Reviewed-by: Brian Cain Cc: Bill Wendling Cc: Justin Stitt Cc: Nick Desaulniers Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 57465bff1fe49..df7f349c8d4f3 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } -- GitLab From a433300285d7e3c816a82bf6506d2b6398503be6 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Sat, 16 Mar 2024 00:44:44 +0100 Subject: [PATCH 1336/1418] mmc: sdhci-omap: re-tuning is needed after a pm transition to support emmc HS200 mode commit f9e2a5b00a35f2c064dc679808bc8db5cc779ed6 upstream. "PM runtime functions" was been added in sdhci-omap driver in commit f433e8aac6b9 ("mmc: sdhci-omap: Implement PM runtime functions") along with "card power off and enable aggressive PM" in commit 3edf588e7fe0 ("mmc: sdhci-omap: Allow SDIO card power off and enable aggressive PM"). Since then, the sdhci-omap driver doesn't work using mmc-hs200 mode due to the tuning values being lost during a pm transition. As for the sdhci_am654 driver, request a new tuning sequence before suspend (sdhci_omap_runtime_suspend()), otherwise the device will trigger cache flush error: mmc1: cache flush error -110 (ETIMEDOUT) mmc1: error -110 doing aggressive suspend followed by I/O errors produced by fdisk -l /dev/mmcblk1boot1: I/O error, dev mmcblk1boot0, sector 64384 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 I/O error, dev mmcblk1boot1, sector 64384 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 I/O error, dev mmcblk1boot1, sector 64384 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 Buffer I/O error on dev mmcblk1boot1, logical block 8048, async page read I/O error, dev mmcblk1boot0, sector 64384 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 Buffer I/O error on dev mmcblk1boot0, logical block 8048, async page read Don't re-tune if auto retuning is supported in HW (when SDHCI_TUNING_MODE_3 is available). Link: https://lore.kernel.org/all/2e5f1997-564c-44e4-b357-6343e0dae7ab@smile.fr Fixes: f433e8aac6b9 ("mmc: sdhci-omap: Implement PM runtime functions") Signed-off-by: Romain Naour Reviewed-by: Tony Lindgren Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240315234444.816978-1-romain.naour@smile.fr Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-omap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 033be559a7309..bfb7c8b96341c 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1442,6 +1442,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + if (host->tuning_mode != SDHCI_TUNING_MODE_3) + mmc_retune_needed(host->mmc); + if (omap_host->con != -EINVAL) sdhci_runtime_suspend_host(host); -- GitLab From 547f4afaced0901d1d4c4bd64415dc1223f5fff8 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Wed, 13 Mar 2024 15:37:43 +0200 Subject: [PATCH 1337/1418] mmc: core: Initialize mmc_blk_ioc_data commit 0cdfe5b0bf295c0dee97436a8ed13336933a0211 upstream. Commit 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") adds flags uint to struct mmc_blk_ioc_data, but it does not get initialized for RPMB ioctls which now fails. Let's fix this by always initializing the struct and flags to zero. Fixes: 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218587 Link: https://lore.kernel.org/all/20231129092535.3278-1-avri.altman@wdc.com/ Cc: stable@vger.kernel.org Signed-off-by: Mikko Rapeli Reviewed-by: Avri Altman Acked-by: Adrian Hunter Tested-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240313133744.2405325-1-mikko.rapeli@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4688a658d6a6d..4aaaaca5ae7ad 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -415,7 +415,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; -- GitLab From ad9cc5e9e53ab94aa0c7ac65d43be7eb208dcb55 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Wed, 13 Mar 2024 15:37:44 +0200 Subject: [PATCH 1338/1418] mmc: core: Avoid negative index with array access commit cf55a7acd1ed38afe43bba1c8a0935b51d1dc014 upstream. Commit 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") assigns prev_idata = idatas[i - 1], but doesn't check that the iterator i is greater than zero. Let's fix this by adding a check. Fixes: 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") Link: https://lore.kernel.org/all/20231129092535.3278-1-avri.altman@wdc.com/ Cc: stable@vger.kernel.org Signed-off-by: Mikko Rapeli Reviewed-by: Avri Altman Tested-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240313133744.2405325-2-mikko.rapeli@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4aaaaca5ae7ad..657772546b6b1 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -490,7 +490,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->flags & MMC_BLK_IOC_DROP) return 0; - if (idata->flags & MMC_BLK_IOC_SBC) + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) prev_idata = idatas[i - 1]; /* -- GitLab From 244cb8200e3a99ed3c34bb16e627d477bfac93ba Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Mar 2024 09:43:40 +0900 Subject: [PATCH 1339/1418] block: Do not force full zone append completion in req_bio_endio() commit 55251fbdf0146c252ceff146a1bb145546f3e034 upstream. This reverts commit 748dc0b65ec2b4b7b3dbd7befcc4a54fdcac7988. Partial zone append completions cannot be supported as there is no guarantees that the fragmented data will be written sequentially in the same manner as with a full command. Commit 748dc0b65ec2 ("block: fix partial zone append completion handling in req_bio_endio()") changed req_bio_endio() to always advance a partially failed BIO by its full length, but this can lead to incorrect accounting. So revert this change and let low level device drivers handle this case by always failing completely zone append operations. With this revert, users will still see an IO error for a partially completed zone append BIO. Fixes: 748dc0b65ec2 ("block: fix partial zone append completion handling in req_bio_endio()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240328004409.594888-2-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 07610505c1776..e1b12f3d54bd4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -761,16 +761,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. - * For such case, force the completed nbytes to be equal to - * the BIO size so that bio_advance() sets the BIO remaining - * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) { + if (bio->bi_iter.bi_size != nbytes) bio->bi_status = BLK_STS_IOERR; - nbytes = bio->bi_iter.bi_size; - } else { + else bio->bi_iter.bi_sector = rq->__sector; - } } bio_advance(bio, nbytes); -- GitLab From 6fc218ccd534d7246adb5b3cae6cafbc5dcf7a3c Mon Sep 17 00:00:00 2001 From: Ye Zhang Date: Thu, 21 Mar 2024 18:21:00 +0800 Subject: [PATCH 1340/1418] thermal: devfreq_cooling: Fix perf state when calculate dfc res_util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a26de34b3c77ae3a969654d94be49e433c947e3b upstream. The issue occurs when the devfreq cooling device uses the EM power model and the get_real_power() callback is provided by the driver. The EM power table is sorted ascending,can't index the table by cooling device state,so convert cooling state to performance state by dfc->max_state - dfc->capped_state. Fixes: 615510fe13bd ("thermal: devfreq_cooling: remove old power model and use EM") Cc: 5.11+ # 5.11+ Signed-off-by: Ye Zhang Reviewed-by: Dhruva Gole Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/devfreq_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 24b474925cd68..0b424bc8cadfd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -201,7 +201,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { - state = dfc->capped_state; + state = dfc->max_state - dfc->capped_state; /* Convert EM power into milli-Watts first */ dfc->res_util = dfc->em_pd->table[state].power; -- GitLab From 9acfd8b083a0ffbd387566800d89f55058a68af2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 6 Mar 2024 13:01:04 +0800 Subject: [PATCH 1341/1418] nouveau/dmem: handle kcalloc() allocation failure commit 16e87fe23d4af6df920406494ced5c0f4354567b upstream. The kcalloc() in nouveau_dmem_evict_chunk() will return null if the physical memory has run out. As a result, if we dereference src_pfns, dst_pfns or dma_addrs, the null pointer dereference bugs will happen. Moreover, the GPU is going away. If the kcalloc() fails, we could not evict all pages mapping a chunk. So this patch adds a __GFP_NOFAIL flag in kcalloc(). Finally, as there is no need to have physically contiguous memory, this patch switches kcalloc() to kvcalloc() in order to avoid failing allocations. CC: # v6.1 Fixes: 249881232e14 ("nouveau/dmem: evict device private memory during release") Suggested-by: Danilo Krummrich Signed-off-by: Duoming Zhou Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240306050104.11259-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 20fe53815b20f..6ca4a46a82ee9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -379,9 +379,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -407,11 +407,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void -- GitLab From 46efbdbc95a30951c2579caf97b6df2ee2b3bef3 Mon Sep 17 00:00:00 2001 From: Claus Hansen Ries Date: Thu, 21 Mar 2024 13:08:59 +0000 Subject: [PATCH 1342/1418] net: ll_temac: platform_get_resource replaced by wrong function commit 3a38a829c8bc27d78552c28e582eb1d885d07d11 upstream. The function platform_get_resource was replaced with devm_platform_ioremap_resource_byname and is called using 0 as name. This eventually ends up in platform_get_resource_byname in the call stack, where it causes a null pointer in strcmp. if (type == resource_type(r) && !strcmp(r->name, name)) It should have been replaced with devm_platform_ioremap_resource. Fixes: bd69058f50d5 ("net: ll_temac: Use devm_platform_ioremap_resource_byname()") Signed-off-by: Claus Hansen Ries Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cca18f9c630a41c18487729770b492bb@terma.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 6bf5e341c3c11..08c45756b2181 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1445,7 +1445,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; -- GitLab From 016119154981d81c9e8f2ea3f56b9e2b4ea14500 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 12 Mar 2024 10:35:12 +0100 Subject: [PATCH 1343/1418] drm/vmwgfx: Create debugfs ttm_resource_manager entry only if needed commit 4be9075fec0a639384ed19975634b662bfab938f upstream. The driver creates /sys/kernel/debug/dri/0/mob_ttm even when the corresponding ttm_resource_manager is not allocated. This leads to a crash when trying to read from this file. Add a check to create mob_ttm, system_mob_ttm, and gmr_ttm debug file only when the corresponding ttm_resource_manager is allocated. crash> bt PID: 3133409 TASK: ffff8fe4834a5000 CPU: 3 COMMAND: "grep" #0 [ffffb954506b3b20] machine_kexec at ffffffffb2a6bec3 #1 [ffffb954506b3b78] __crash_kexec at ffffffffb2bb598a #2 [ffffb954506b3c38] crash_kexec at ffffffffb2bb68c1 #3 [ffffb954506b3c50] oops_end at ffffffffb2a2a9b1 #4 [ffffb954506b3c70] no_context at ffffffffb2a7e913 #5 [ffffb954506b3cc8] __bad_area_nosemaphore at ffffffffb2a7ec8c #6 [ffffb954506b3d10] do_page_fault at ffffffffb2a7f887 #7 [ffffb954506b3d40] page_fault at ffffffffb360116e [exception RIP: ttm_resource_manager_debug+0x11] RIP: ffffffffc04afd11 RSP: ffffb954506b3df0 RFLAGS: 00010246 RAX: ffff8fe41a6d1200 RBX: 0000000000000000 RCX: 0000000000000940 RDX: 0000000000000000 RSI: ffffffffc04b4338 RDI: 0000000000000000 RBP: ffffb954506b3e08 R8: ffff8fee3ffad000 R9: 0000000000000000 R10: ffff8fe41a76a000 R11: 0000000000000001 R12: 00000000ffffffff R13: 0000000000000001 R14: ffff8fe5bb6f3900 R15: ffff8fe41a6d1200 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffffb954506b3e00] ttm_resource_manager_show at ffffffffc04afde7 [ttm] #9 [ffffb954506b3e30] seq_read at ffffffffb2d8f9f3 RIP: 00007f4c4eda8985 RSP: 00007ffdbba9e9f8 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 000000000037e000 RCX: 00007f4c4eda8985 RDX: 000000000037e000 RSI: 00007f4c41573000 RDI: 0000000000000003 RBP: 000000000037e000 R8: 0000000000000000 R9: 000000000037fe30 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4c41573000 R13: 0000000000000003 R14: 00007f4c41572010 R15: 0000000000000003 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b Signed-off-by: Jocelyn Falempe Fixes: af4a25bbe5e7 ("drm/vmwgfx: Add debugfs entries for various ttm resource managers") Cc: Reviewed-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240312093551.196609-1-jfalempe@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b909a3ce9af3c..9d7a1b710f48f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1429,12 +1429,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, -- GitLab From b691954c94dbbe63c0aa9241cc516985aa59df05 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 20 Mar 2024 15:53:47 -0400 Subject: [PATCH 1344/1418] drm/amdkfd: fix TLB flush after unmap for GFX9.4.2 commit 1210e2f1033dc56b666c9f6dfb761a2d3f9f5d6c upstream. TLB flush after unmap accidentially was removed on gfx9.4.2. It is to add it back. Signed-off-by: Eric Huang Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ec8a576ac5a9e..3c7d267f2a07b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1349,7 +1349,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } -- GitLab From 72e4d3fb72e9f0f016946158a7d95304832768e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 19 Mar 2024 11:24:42 +0200 Subject: [PATCH 1345/1418] drm/i915/bios: Tolerate devdata==NULL in intel_bios_encoder_supports_dp_dual_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 32e39bab59934bfd3f37097d4dd85ac5eb0fd549 upstream. If we have no VBT, or the VBT didn't declare the encoder in question, we won't have the 'devdata' for the encoder. Instead of oopsing just bail early. We won't be able to tell whether the port is DP++ or not, but so be it. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10464 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240319092443.15769-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 26410896206342c8a80d2b027923e9ee7d33b733) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_bios.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index a70b7061742a8..9cc1ef2ca72cc 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3413,6 +3413,9 @@ static bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_enc { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false; -- GitLab From 7eab7b021835ae422c38b968d5cc60e99408fb62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2024 14:58:47 +0100 Subject: [PATCH 1346/1418] drm/i915/gt: Reset queue_priority_hint on parking commit 4a3859ea5240365d21f6053ee219bb240d520895 upstream. Originally, with strict in order execution, we could complete execution only when the queue was empty. Preempt-to-busy allows replacement of an active request that may complete before the preemption is processed by HW. If that happens, the request is retired from the queue, but the queue_priority_hint remains set, preventing direct submission until after the next CS interrupt is processed. This preempt-to-busy race can be triggered by the heartbeat, which will also act as the power-management barrier and upon completion allow us to idle the HW. We may process the completion of the heartbeat, and begin parking the engine before the CS event that restores the queue_priority_hint, causing us to fail the assertion that it is MIN. <3>[ 166.210729] __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 166.210781] Dumping ftrace buffer: <0>[ 166.210795] --------------------------------- ... <0>[ 167.302811] drm_fdin-1097 2..s1. 165741070us : trace_ports: 0000:00:02.0 rcs0: promote { ccid:20 1217:2 prio 0 } <0>[ 167.302861] drm_fdin-1097 2d.s2. 165741072us : execlists_submission_tasklet: 0000:00:02.0 rcs0: preempting last=1217:2, prio=0, hint=2147483646 <0>[ 167.302928] drm_fdin-1097 2d.s2. 165741072us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 1217:2, current 0 <0>[ 167.302992] drm_fdin-1097 2d.s2. 165741073us : __i915_request_submit: 0000:00:02.0 rcs0: fence 3:4660, current 4659 <0>[ 167.303044] drm_fdin-1097 2d.s1. 165741076us : execlists_submission_tasklet: 0000:00:02.0 rcs0: context:3 schedule-in, ccid:40 <0>[ 167.303095] drm_fdin-1097 2d.s1. 165741077us : trace_ports: 0000:00:02.0 rcs0: submit { ccid:40 3:4660* prio 2147483646 } <0>[ 167.303159] kworker/-89 11..... 165741139us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence c90:2, current 2 <0>[ 167.303208] kworker/-89 11..... 165741148us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:c90 unpin <0>[ 167.303272] kworker/-89 11..... 165741159us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 1217:2, current 2 <0>[ 167.303321] kworker/-89 11..... 165741166us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:1217 unpin <0>[ 167.303384] kworker/-89 11..... 165741170us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 3:4660, current 4660 <0>[ 167.303434] kworker/-89 11d..1. 165741172us : __intel_context_retire: 0000:00:02.0 rcs0: context:1216 retire runtime: { total:56028ns, avg:56028ns } <0>[ 167.303484] kworker/-89 11..... 165741198us : __engine_park: 0000:00:02.0 rcs0: parked <0>[ 167.303534] -0 5d.H3. 165741207us : execlists_irq_handler: 0000:00:02.0 rcs0: semaphore yield: 00000040 <0>[ 167.303583] kworker/-89 11..... 165741397us : __intel_context_retire: 0000:00:02.0 rcs0: context:1217 retire runtime: { total:325575ns, avg:0ns } <0>[ 167.303756] kworker/-89 11..... 165741777us : __intel_context_retire: 0000:00:02.0 rcs0: context:c90 retire runtime: { total:0ns, avg:0ns } <0>[ 167.303806] kworker/-89 11..... 165742017us : __engine_park: __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 167.303811] --------------------------------- <4>[ 167.304722] ------------[ cut here ]------------ <2>[ 167.304725] kernel BUG at drivers/gpu/drm/i915/gt/intel_engine_pm.c:283! <4>[ 167.304731] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 167.304734] CPU: 11 PID: 89 Comm: kworker/11:1 Tainted: G W 6.8.0-rc2-CI_DRM_14193-gc655e0fd2804+ #1 <4>[ 167.304736] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 <4>[ 167.304738] Workqueue: i915-unordered retire_work_handler [i915] <4>[ 167.304839] RIP: 0010:__engine_park+0x3fd/0x680 [i915] <4>[ 167.304937] Code: 00 48 c7 c2 b0 e5 86 a0 48 8d 3d 00 00 00 00 e8 79 48 d4 e0 bf 01 00 00 00 e8 ef 0a d4 e0 31 f6 bf 09 00 00 00 e8 03 49 c0 e0 <0f> 0b 0f 0b be 01 00 00 00 e8 f5 61 fd ff 31 c0 e9 34 fd ff ff 48 <4>[ 167.304940] RSP: 0018:ffffc9000059fce0 EFLAGS: 00010246 <4>[ 167.304942] RAX: 0000000000000200 RBX: 0000000000000000 RCX: 0000000000000006 <4>[ 167.304944] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 <4>[ 167.304946] RBP: ffff8881330ca1b0 R08: 0000000000000001 R09: 0000000000000001 <4>[ 167.304947] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881330ca000 <4>[ 167.304948] R13: ffff888110f02aa0 R14: ffff88812d1d0205 R15: ffff88811277d4f0 <4>[ 167.304950] FS: 0000000000000000(0000) GS:ffff88844f780000(0000) knlGS:0000000000000000 <4>[ 167.304952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 167.304953] CR2: 00007fc362200c40 CR3: 000000013306e003 CR4: 0000000000770ef0 <4>[ 167.304955] PKRU: 55555554 <4>[ 167.304957] Call Trace: <4>[ 167.304958] <4>[ 167.305573] ____intel_wakeref_put_last+0x1d/0x80 [i915] <4>[ 167.305685] i915_request_retire.part.0+0x34f/0x600 [i915] <4>[ 167.305800] retire_requests+0x51/0x80 [i915] <4>[ 167.305892] intel_gt_retire_requests_timeout+0x27f/0x700 [i915] <4>[ 167.305985] process_scheduled_works+0x2db/0x530 <4>[ 167.305990] worker_thread+0x18c/0x350 <4>[ 167.305993] kthread+0xfe/0x130 <4>[ 167.305997] ret_from_fork+0x2c/0x50 <4>[ 167.306001] ret_from_fork_asm+0x1b/0x30 <4>[ 167.306004] It is necessary for the queue_priority_hint to be lower than the next request submission upon waking up, as we rely on the hint to decide when to kick the tasklet to submit that first request. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154 Signed-off-by: Chris Wilson Signed-off-by: Janusz Krzysztofik Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240318135906.716055-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 98850e96cf811dc2d0a7d0af491caff9f5d49c1e) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee9..feb0fc32a19ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -253,9 +253,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f903ee1ce06e7..eae138b9f2df3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3270,6 +3270,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) -- GitLab From fa2b938438cd217a9f937f0e12bf04470930a9ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 26 Mar 2024 12:43:17 -0400 Subject: [PATCH 1347/1418] Bluetooth: hci_sync: Fix not checking error on hci_cmd_sync_cancel_sync commit 1c3366abdbe884be62e5a7502b4db758aa3974c6 upstream. hci_cmd_sync_cancel_sync shall check the error passed to it since it will be propagated using req_result which is __u32 it needs to be properly set to a positive value if it was passed as negative othertise IS_ERR will not trigger as -(errno) would be converted to a positive value. Fixes: 63298d6e752f ("Bluetooth: hci_core: Cancel request on command timeout") Signed-off-by: Luiz Augusto von Dentz Reported-and-tested-by: Thorsten Leemhuis Closes: https://lore.kernel.org/all/08275279-7462-4f4a-a0ee-8aa015f829bc@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 6 +++--- net/bluetooth/hci_sync.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 70f24dc75b596..02e67ff05b7b4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2838,7 +2838,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); - hci_cmd_sync_cancel_sync(hdev, -err); + hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ @@ -2858,7 +2858,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4169,7 +4169,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) err = hci_send_frame(hdev, skb); if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); + hci_cmd_sync_cancel_sync(hdev, -err); return; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 65b2ad34179f8..7e64cf880f9f1 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -678,7 +678,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); -- GitLab From 181f92abda617d406ceac7f9df1cc45152ca329b Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 14 Mar 2024 10:26:27 +0100 Subject: [PATCH 1348/1418] Revert "usb: phy: generic: Get the vbus supply" commit fdada0db0b2ae2addef4ccafe50937874dbeeebe upstream. This reverts commit 75fd6485cccef269ac9eb3b71cf56753341195ef. This patch was applied twice by accident, causing probe failures. Revert the accident. Signed-off-by: Alexander Stein Fixes: 75fd6485ccce ("usb: phy: generic: Get the vbus supply") Cc: stable Reviewed-by: Sean Anderson Link: https://lore.kernel.org/r/20240314092628.1869414-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 953df04b40d40..3dc5c04e7cbf9 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -265,13 +265,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); - if (PTR_ERR(nop->vbus_draw) == -ENODEV) - nop->vbus_draw = NULL; - if (IS_ERR(nop->vbus_draw)) - return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), - "could not get vbus regulator\n"); - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); if (PTR_ERR(nop->vbus_draw) == -ENODEV) nop->vbus_draw = NULL; -- GitLab From 916cd2fcbc1e344bcabf4b2a834cdf5a0417d30c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Mar 2024 12:50:48 +0100 Subject: [PATCH 1349/1418] usb: cdc-wdm: close race between read and workqueue commit 339f83612f3a569b194680768b22bf113c26a29d upstream. wdm_read() cannot race with itself. However, in service_outstanding_interrupt() it can race with the workqueue, which can be triggered by error handling. Hence we need to make sure that the WDM_RESPONDING flag is not just only set but tested. Fixes: afba937e540c9 ("USB: CDC WDM driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20240314115132.3907-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 1f0951be15ab7..fdc1a66b129a4 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,6 +485,7 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; + int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -499,7 +500,10 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - set_bit(WDM_RESPONDING, &desc->flags); + used = test_and_set_bit(WDM_RESPONDING, &desc->flags); + if (used) + goto out; + spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); -- GitLab From c9006b90ba6876c87cffc70d604549ab5c51749a Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Thu, 7 Mar 2024 02:08:14 +0800 Subject: [PATCH 1350/1418] USB: UAS: return ENODEV when submit urbs fail with device not attached commit cd5432c712351a3d5f82512908f5febfca946ca6 upstream. In the scenario of entering hibernation with udisk in the system, if the udisk was gone or resume fail in the thaw phase of hibernation. Its state will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed and can't not handle disconnect event. Next, in the poweroff phase of hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk when poweroff this scsi device, which will cause uas_submit_urbs to be called to submit URB for sense/data/cmd pipe. However, these URBs will submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead the SCSI layer go into an ugly loop and system fail to go into hibernation. On the other hand, when we specially check for -ENODEV in function uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device poweroff fail and system shutdown instead of entering hibernation. To fix this issue, let uas_submit_urbs to return original generic error when submitting URB failed. At the same time, we need to translate -ENODEV to DID_NOT_CONNECT for the SCSI layer. Suggested-by: Oliver Neukum Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ed22053b3252f..af619efe8eabf 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); - struct urb *urb; int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd) * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); + set_host_byte(cmnd, DID_NO_CONNECT); scsi_done(cmnd); goto zombie; } -- GitLab From aa1d1ce1ad6b5ec94f91c51c0e5c161444825875 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 15 Jul 2023 16:07:26 +0200 Subject: [PATCH 1351/1418] usb: dwc3-am62: Rename private data [ Upstream commit 3609699c32aa4f2710a6fe2b21afc6a9a3c66bc5 ] Rename dwc3_data to dwc3_am62 to make it consistent with other glue drivers, it's clearer that this is am62's specific. While there, do the same for data variable. Signed-off-by: Ladislav Michl Link: https://lore.kernel.org/r/ZLKoHhJvT+Y6aM+C@lenoch Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 6661befe4100 ("usb: dwc3-am62: fix module unload/reload behavior") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-am62.c | 80 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index 173cf3579c55d..726f96d257c8d 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -89,7 +89,7 @@ #define DWC3_AM62_AUTOSUSPEND_DELAY 100 -struct dwc3_data { +struct dwc3_am62 { struct device *dev; void __iomem *usbss; struct clk *usb2_refclk; @@ -115,19 +115,19 @@ static const int dwc3_ti_rate_table[] = { /* in KHZ */ 52000, }; -static inline u32 dwc3_ti_readl(struct dwc3_data *data, u32 offset) +static inline u32 dwc3_ti_readl(struct dwc3_am62 *am62, u32 offset) { - return readl((data->usbss) + offset); + return readl((am62->usbss) + offset); } -static inline void dwc3_ti_writel(struct dwc3_data *data, u32 offset, u32 value) +static inline void dwc3_ti_writel(struct dwc3_am62 *am62, u32 offset, u32 value) { - writel(value, (data->usbss) + offset); + writel(value, (am62->usbss) + offset); } -static int phy_syscon_pll_refclk(struct dwc3_data *data) +static int phy_syscon_pll_refclk(struct dwc3_am62 *am62) { - struct device *dev = data->dev; + struct device *dev = am62->dev; struct device_node *node = dev->of_node; struct of_phandle_args args; struct regmap *syscon; @@ -139,16 +139,16 @@ static int phy_syscon_pll_refclk(struct dwc3_data *data) return PTR_ERR(syscon); } - data->syscon = syscon; + am62->syscon = syscon; ret = of_parse_phandle_with_fixed_args(node, "ti,syscon-phy-pll-refclk", 1, 0, &args); if (ret) return ret; - data->offset = args.args[0]; + am62->offset = args.args[0]; - ret = regmap_update_bits(data->syscon, data->offset, PHY_PLL_REFCLK_MASK, data->rate_code); + ret = regmap_update_bits(am62->syscon, am62->offset, PHY_PLL_REFCLK_MASK, am62->rate_code); if (ret) { dev_err(dev, "failed to set phy pll reference clock rate\n"); return ret; @@ -161,32 +161,32 @@ static int dwc3_ti_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = pdev->dev.of_node; - struct dwc3_data *data; + struct dwc3_am62 *am62; int i, ret; unsigned long rate; u32 reg; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) + am62 = devm_kzalloc(dev, sizeof(*am62), GFP_KERNEL); + if (!am62) return -ENOMEM; - data->dev = dev; - platform_set_drvdata(pdev, data); + am62->dev = dev; + platform_set_drvdata(pdev, am62); - data->usbss = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(data->usbss)) { + am62->usbss = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(am62->usbss)) { dev_err(dev, "can't map IOMEM resource\n"); - return PTR_ERR(data->usbss); + return PTR_ERR(am62->usbss); } - data->usb2_refclk = devm_clk_get(dev, "ref"); - if (IS_ERR(data->usb2_refclk)) { + am62->usb2_refclk = devm_clk_get(dev, "ref"); + if (IS_ERR(am62->usb2_refclk)) { dev_err(dev, "can't get usb2_refclk\n"); - return PTR_ERR(data->usb2_refclk); + return PTR_ERR(am62->usb2_refclk); } /* Calculate the rate code */ - rate = clk_get_rate(data->usb2_refclk); + rate = clk_get_rate(am62->usb2_refclk); rate /= 1000; // To KHz for (i = 0; i < ARRAY_SIZE(dwc3_ti_rate_table); i++) { if (dwc3_ti_rate_table[i] == rate) @@ -198,20 +198,20 @@ static int dwc3_ti_probe(struct platform_device *pdev) return -EINVAL; } - data->rate_code = i; + am62->rate_code = i; /* Read the syscon property and set the rate code */ - ret = phy_syscon_pll_refclk(data); + ret = phy_syscon_pll_refclk(am62); if (ret) return ret; /* VBUS divider select */ - data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); - reg = dwc3_ti_readl(data, USBSS_PHY_CONFIG); - if (data->vbus_divider) + am62->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); + reg = dwc3_ti_readl(am62, USBSS_PHY_CONFIG); + if (am62->vbus_divider) reg |= 1 << USBSS_PHY_VBUS_SEL_SHIFT; - dwc3_ti_writel(data, USBSS_PHY_CONFIG, reg); + dwc3_ti_writel(am62, USBSS_PHY_CONFIG, reg); pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -219,7 +219,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) * Don't ignore its dependencies with its children */ pm_suspend_ignore_children(dev, false); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); pm_runtime_get_noresume(dev); ret = of_platform_populate(node, NULL, NULL, dev); @@ -229,9 +229,9 @@ static int dwc3_ti_probe(struct platform_device *pdev) } /* Set mode valid bit to indicate role is valid */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg |= USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); /* Setting up autosuspend */ pm_runtime_set_autosuspend_delay(dev, DWC3_AM62_AUTOSUSPEND_DELAY); @@ -241,7 +241,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) return 0; err_pm_disable: - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); return ret; @@ -258,18 +258,18 @@ static int dwc3_ti_remove_core(struct device *dev, void *c) static int dwc3_ti_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct dwc3_data *data = platform_get_drvdata(pdev); + struct dwc3_am62 *am62 = platform_get_drvdata(pdev); u32 reg; device_for_each_child(dev, NULL, dwc3_ti_remove_core); /* Clear mode valid bit */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg &= ~USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); pm_runtime_put_sync(dev); - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); @@ -280,18 +280,18 @@ static int dwc3_ti_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int dwc3_ti_suspend_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); return 0; } static int dwc3_ti_resume_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); return 0; } -- GitLab From 6c6a45645a2e6a272dfde14eddbb6706de63c25d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 27 Feb 2024 11:23:48 +0200 Subject: [PATCH 1352/1418] usb: dwc3-am62: fix module unload/reload behavior [ Upstream commit 6661befe41009c210efa2c1bcd16a5cc4cff8a06 ] As runtime PM is enabled, the module can be runtime suspended when .remove() is called. Do a pm_runtime_get_sync() to make sure module is active before doing any register operations. Doing a pm_runtime_put_sync() should disable the refclk so no need to disable it again. Fixes the below warning at module removel. [ 39.705310] ------------[ cut here ]------------ [ 39.710004] clk:162:3 already disabled [ 39.713941] WARNING: CPU: 0 PID: 921 at drivers/clk/clk.c:1090 clk_core_disable+0xb0/0xb8 We called of_platform_populate() in .probe() so call the cleanup function of_platform_depopulate() in .remove(). Get rid of the now unnnecessary dwc3_ti_remove_core(). Without this, module re-load doesn't work properly. Fixes: e8784c0aec03 ("drivers: usb: dwc3: Add AM62 USB wrapper driver") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20240227-for-v6-9-am62-usb-errata-3-0-v4-1-0ada8ddb0767@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-am62.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index 726f96d257c8d..ad8a2eadb472b 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -247,21 +247,14 @@ err_pm_disable: return ret; } -static int dwc3_ti_remove_core(struct device *dev, void *c) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - return 0; -} - static int dwc3_ti_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct dwc3_am62 *am62 = platform_get_drvdata(pdev); u32 reg; - device_for_each_child(dev, NULL, dwc3_ti_remove_core); + pm_runtime_get_sync(dev); + of_platform_depopulate(dev); /* Clear mode valid bit */ reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); @@ -269,7 +262,6 @@ static int dwc3_ti_remove(struct platform_device *pdev) dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); pm_runtime_put_sync(dev); - clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); -- GitLab From 9d66ae0e7bb78b54e1e0525456c6b54e1d132046 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 26 Mar 2024 17:42:38 +0800 Subject: [PATCH 1353/1418] ALSA: sh: aica: reorder cleanup operations to avoid UAF bugs commit 051e0840ffa8ab25554d6b14b62c9ab9e4901457 upstream. The dreamcastcard->timer could schedule the spu_dma_work and the spu_dma_work could also arm the dreamcastcard->timer. When the snd_pcm_substream is closing, the aica_channel will be deallocated. But it could still be dereferenced in the worker thread. The reason is that del_timer() will return directly regardless of whether the timer handler is running or not and the worker could be rescheduled in the timer handler. As a result, the UAF bug will happen. The racy situation is shown below: (Thread 1) | (Thread 2) snd_aicapcm_pcm_close() | ... | run_spu_dma() //worker | mod_timer() flush_work() | del_timer() | aica_period_elapsed() //timer kfree(dreamcastcard->channel) | schedule_work() | run_spu_dma() //worker ... | dreamcastcard->channel-> //USE In order to mitigate this bug and other possible corner cases, call mod_timer() conditionally in run_spu_dma(), then implement PCM sync_stop op to cancel both the timer and worker. The sync_stop op will be called from PCM core appropriately when needed. Fixes: 198de43d758c ("[ALSA] Add ALSA support for the SEGA Dreamcast PCM device") Suggested-by: Takashi Iwai Signed-off-by: Duoming Zhou Message-ID: <20240326094238.95442-1-duoming@zju.edu.cn> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sh/aica.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 6e9d6bd67369a..8b47bfcd90318 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work) dreamcastcard->clicks++; if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER)) dreamcastcard->clicks %= AICA_PERIOD_NUMBER; - mod_timer(&dreamcastcard->timer, jiffies + 1); + if (snd_pcm_running(dreamcastcard->substream)) + mod_timer(&dreamcastcard->timer, jiffies + 1); } } @@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t) /*timer function - so cannot sleep */ int play_period; struct snd_pcm_runtime *runtime; + if (!snd_pcm_running(substream)) + return; runtime = substream->runtime; dreamcastcard = substream->pcm->private_data; /* Have we played out an additional period? */ @@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream return 0; } +static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct snd_card_aica *dreamcastcard = substream->pcm->private_data; + + del_timer_sync(&dreamcastcard->timer); + cancel_work_sync(&dreamcastcard->spu_dma_work); + return 0; +} + static int snd_aicapcm_pcm_close(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - flush_work(&(dreamcastcard->spu_dma_work)); - del_timer(&dreamcastcard->timer); dreamcastcard->substream = NULL; kfree(dreamcastcard->channel); spu_disable(); @@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = { .prepare = snd_aicapcm_pcm_prepare, .trigger = snd_aicapcm_pcm_trigger, .pointer = snd_aicapcm_pcm_pointer, + .sync_stop = snd_aicapcm_pcm_sync_stop, }; /* TO DO: set up to handle more than one pcm instance */ -- GitLab From 3678cf67ff7136db1dd3bf63c361650db5d92889 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 13 Mar 2024 08:21:20 -0300 Subject: [PATCH 1354/1418] scsi: core: Fix unremoved procfs host directory regression commit f23a4d6e07570826fe95023ca1aa96a011fa9f84 upstream. Commit fc663711b944 ("scsi: core: Remove the /proc/scsi/${proc_name} directory earlier") fixed a bug related to modules loading/unloading, by adding a call to scsi_proc_hostdir_rm() on scsi_remove_host(). But that led to a potential duplicate call to the hostdir_rm() routine, since it's also called from scsi_host_dev_release(). That triggered a regression report, which was then fixed by commit be03df3d4bfe ("scsi: core: Fix a procfs host directory removal regression"). The fix just dropped the hostdir_rm() call from dev_release(). But it happens that this proc directory is created on scsi_host_alloc(), and that function "pairs" with scsi_host_dev_release(), while scsi_remove_host() pairs with scsi_add_host(). In other words, it seems the reason for removing the proc directory on dev_release() was meant to cover cases in which a SCSI host structure was allocated, but the call to scsi_add_host() didn't happen. And that pattern happens to exist in some error paths, for example. Syzkaller causes that by using USB raw gadget device, error'ing on usb-storage driver, at usb_stor_probe2(). By checking that path, we can see that the BadDevice label leads to a scsi_host_put() after a SCSI host allocation, but there's no call to scsi_add_host() in such path. That leads to messages like this in dmesg (and a leak of the SCSI host proc structure): usb-storage 4-1:87.51: USB Mass Storage device detected proc_dir_entry 'scsi/usb-storage' already registered WARNING: CPU: 1 PID: 3519 at fs/proc/generic.c:377 proc_register+0x347/0x4e0 fs/proc/generic.c:376 The proper fix seems to still call scsi_proc_hostdir_rm() on dev_release(), but guard that with the state check for SHOST_CREATED; there is even a comment in scsi_host_dev_release() detailing that: such conditional is meant for cases where the SCSI host was allocated but there was no calls to {add,remove}_host(), like the usb-storage case. This is what we propose here and with that, the error path of usb-storage does not trigger the warning anymore. Reported-by: syzbot+c645abf505ed21f931b5@syzkaller.appspotmail.com Fixes: be03df3d4bfe ("scsi: core: Fix a procfs host directory removal regression") Cc: stable@vger.kernel.org Cc: Bart Van Assche Cc: John Garry Cc: Shin'ichiro Kawasaki Signed-off-by: Guilherme G. Piccoli Link: https://lore.kernel.org/r/20240313113006.2834799-1-gpiccoli@igalia.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hosts.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8b825364baade..c785493b105c0 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_host_remove() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } -- GitLab From 008bf3d622a9845f7f3363579b7f6851607996cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2024 17:36:56 +0100 Subject: [PATCH 1355/1418] staging: vc04_services: changen strncpy() to strscpy_pad() commit ef25725b7f8aaffd7756974d3246ec44fae0a5cf upstream. gcc-14 warns about this strncpy() that results in a non-terminated string for an overflow: In file included from include/linux/string.h:369, from drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c:20: In function 'strncpy', inlined from 'create_component' at drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c:940:2: include/linux/fortify-string.h:108:33: error: '__builtin_strncpy' specified bound 128 equals destination size [-Werror=stringop-truncation] Change it to strscpy_pad(), which produces a properly terminated and zero-padded string. Signed-off-by: Arnd Bergmann Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20240313163712.224585-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index cb921c94996a1..bda791ed8d5a4 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -938,8 +938,8 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), -- GitLab From 8416da2df7e0d5556da6f92e33ac9e498ca89aa3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Mar 2024 21:07:43 +0300 Subject: [PATCH 1356/1418] staging: vc04_services: fix information leak in create_component() commit f37e76abd614b68987abc8e5c22d986013349771 upstream. The m.u.component_create.pid field is for debugging and in the mainline kernel it's not used anything. However, it still needs to be set to something to prevent disclosing uninitialized stack data. Set it to zero. Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Cc: stable Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/2d972847-9ebd-481b-b6f9-af390f5aabd3@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index bda791ed8d5a4..90eb4c5936f38 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -940,6 +940,7 @@ static int create_component(struct vchiq_mmal_instance *instance, m.u.component_create.client_component = component->client_component; strscpy_pad(m.u.component_create.name, name, sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), -- GitLab From cd20a6e83ba019ac91f3a8ed8a68d3385779a3d0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 15 Mar 2024 13:04:50 -0400 Subject: [PATCH 1357/1418] USB: core: Add hub_get() and hub_put() routines commit ee113b860aa169e9a4d2c167c95d0f1961c6e1b8 upstream. Create hub_get() and hub_put() routines to encapsulate the kref_get() and kref_put() calls in hub.c. The new routines will be used by the next patch in this series. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/604da420-ae8a-4a9e-91a4-2d511ff404fb@rowland.harvard.edu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 23 ++++++++++++++++------- drivers/usb/core/hub.h | 2 ++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d960a56b760ec..b1fb04e5247c3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -123,7 +123,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -685,14 +684,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1060,7 +1059,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1308,7 +1307,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1724,6 +1723,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1772,7 +1781,7 @@ static void hub_disconnect(struct usb_interface *intf) onboard_hub_destroy_pdevs(&hub->onboard_hub_devs); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -5874,7 +5883,7 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index bc66205ca52c3..1085c72335d5c 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -118,6 +118,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, -- GitLab From 9dac54f08198147f5ec0ec52fcf1bc8ac899ac05 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 15 Mar 2024 13:06:33 -0400 Subject: [PATCH 1358/1418] USB: core: Fix deadlock in port "disable" sysfs attribute commit f4d1960764d8a70318b02f15203a1be2b2554ca1 upstream. The show and store callback routines for the "disable" sysfs attribute file in port.c acquire the device lock for the port's parent hub device. This can cause problems if another process has locked the hub to remove it or change its configuration: Removing the hub or changing its configuration requires the hub interface to be removed, which requires the port device to be removed, and device_del() waits until all outstanding sysfs attribute callbacks for the ports have returned. The lock can't be released until then. But the disable_show() or disable_store() routine can't return until after it has acquired the lock. The resulting deadlock can be avoided by calling sysfs_break_active_protection(). This will cause the sysfs core not to wait for the attribute's callback routine to return, allowing the removal to proceed. The disadvantage is that after making this call, there is no guarantee that the hub structure won't be deallocated at any moment. To prevent this, we have to acquire a reference to it first by calling hub_get(). Signed-off-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 17aef216cb501..e91fa567d08d2 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -28,11 +28,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -42,9 +53,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -62,15 +77,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = strtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -91,9 +117,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } -- GitLab From 3e284e15b7f05ed1e74ebcdc5d9db6b6e78fcb17 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 19 Mar 2024 16:12:09 +0900 Subject: [PATCH 1359/1418] scsi: sd: Fix TCG OPAL unlock on system resume commit 0c76106cb97548810214def8ee22700bbbb90543 upstream. Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") introduced the manage_system_start_stop scsi_device flag to allow libata to indicate to the SCSI disk driver that nothing should be done when resuming a disk on system resume. This change turned the execution of sd_resume() into a no-op for ATA devices on system resume. While this solved deadlock issues during device resume, this change also wrongly removed the execution of opal_unlock_from_suspend(). As a result, devices with TCG OPAL locking enabled remain locked and inaccessible after a system resume from sleep. To fix this issue, introduce the SCSI driver resume method and implement it with the sd_resume() function calling opal_unlock_from_suspend(). The former sd_resume() function is renamed to sd_resume_common() and modified to call the new sd_resume() function. For non-ATA devices, this result in no functional changes. In order for libata to explicitly execute sd_resume() when a device is resumed during system restart, the function scsi_resume_device() is introduced. libata calls this function from the revalidation work executed on devie resume, a state that is indicated with the new device flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked on resume, allowing normal operation. Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538 Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 5 ++++- drivers/ata/libata-scsi.c | 9 +++++++++ drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++ drivers/scsi/sd.c | 25 +++++++++++++++++++++---- include/linux/libata.h | 1 + include/scsi/scsi_driver.h | 1 + include/scsi/scsi_host.h | 1 + 7 files changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1eaaf01418ea7..b8034d194078d 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -711,8 +711,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3089,6 +3091,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a9da2f05e6297..a09548630fc8b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4652,6 +4652,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4673,7 +4674,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index bab00b65bc9d1..852d509b19b2b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1611,6 +1611,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4433b02c8935f..c793bca882236 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -110,6 +110,7 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume_system(struct device *); static int sd_resume_runtime(struct device *); +static int sd_resume(struct device *); static void sd_rescan(struct device *); static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); @@ -691,6 +692,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, @@ -3830,7 +3832,22 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + if (sdkp->device->no_start_on_resume) + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; @@ -3849,7 +3866,7 @@ static int sd_resume(struct device *dev, bool runtime) } if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -3868,7 +3885,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -3892,7 +3909,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index 45910aebc3778..6645259be1438 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -102,6 +102,7 @@ enum { ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba01..f40915d2eceef 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 71def41b1ad78..149b63e3534ad 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -752,6 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); -- GitLab From 4a8a42e16a5e8b376d7aa8a2d732f3e21512003c Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:21 +0000 Subject: [PATCH 1360/1418] usb: dwc2: host: Fix remote wakeup from hibernation commit bae2bc73a59c200db53b6c15fb26bb758e2c6108 upstream. Starting from core v4.30a changed order of programming GPWRDN_PMUACTV to 0 in case of exit from hibernation on remote wakeup signaling from device. Fixes: c5c403dc4336 ("usb: dwc2: Add host/device hibernation functions") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/99385ec55ce73445b6fbd0f471c9bd40eb1c9b9e.1708939799.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 1 + drivers/usb/dwc2/hcd.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 40cf2880d7e59..e18d9031c9953 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1084,6 +1084,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 35c7a4df8e717..3b955b314199f 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5610,10 +5610,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5638,6 +5640,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; -- GitLab From 3294928206812c1c6060bc27569566705eb034b6 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:11 +0000 Subject: [PATCH 1361/1418] usb: dwc2: host: Fix hibernation flow commit 3c7b9856a82227db01a20171d2e24c7ce305d59b upstream. Added to backup/restore registers HFLBADDR, HCCHARi, HCSPLTi, HCTSIZi, HCDMAi and HCDMABi. Fixes: 58e52ff6a6c3 ("usb: dwc2: Move register save and restore functions") Fixes: d17ee77b3044 ("usb: dwc2: add controller hibernation support") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/c2d10ee6098b9b009a8e94191e046004747d3bdd.1708945444.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 12 ++++++++++++ drivers/usb/dwc2/hcd.c | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index e18d9031c9953..8c888b76abdf1 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -729,8 +729,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -738,8 +744,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 3b955b314199f..6de999c7513eb 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5406,9 +5406,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5442,10 +5449,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); -- GitLab From bc48eb1b53ce977d17d51caa574bd81064a117a2 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:32 +0000 Subject: [PATCH 1362/1418] usb: dwc2: host: Fix ISOC flow in DDMA mode commit b258e42688501cadb1a6dd658d6f015df9f32d8f upstream. Fixed ISOC completion flow in DDMA mode. Added isoc descriptor actual length value and update urb's start_frame value. Fixed initialization of ISOC DMA descriptors flow. Fixes: 56f5b1cff22a ("staging: Core files for the DWC2 driver") Fixes: 20f2eb9c4cf8 ("staging: dwc2: add microframe scheduler from downstream Pi kernel") Fixes: c17b337c1ea4 ("usb: dwc2: host: program descriptor for next frame") Fixes: dc4c76e7b22c ("staging: HCD descriptor DMA support for the DWC2 driver") Fixes: 762d3a1a9cd7 ("usb: dwc2: host: process all completed urbs") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/a8b1e1711cc6cabfb45d92ede12e35445c66f06c.1708944698.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd.c | 12 ++++++++++-- drivers/usb/dwc2/hcd_ddma.c | 17 +++++++++++------ drivers/usb/dwc2/hw.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 6de999c7513eb..70b389125f635 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 6b4d825e97a2d..79582b102c7ed 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -866,6 +866,8 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb = qtd->urb->priv; u16 remain = 0; int rc = 0; @@ -878,8 +880,11 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -900,7 +905,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1005,11 +1010,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 13abdd5f67529..12f8c7f86dc98 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -698,7 +698,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 -- GitLab From 74cdf12f8dd8d7e9d9b2cf605ed136df7c48a659 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:22:01 +0000 Subject: [PATCH 1363/1418] usb: dwc2: gadget: Fix exiting from clock gating commit 31f42da31417bec88158f3cf62d19db836217f1e upstream. Added exiting from the clock gating mode on USB Reset Detect interrupt if core in the clock gating mode. Added new condition to check core in clock gating mode or no. Fixes: 9b4965d77e11 ("usb: dwc2: Add exit clock gating from session request interrupt") Fixes: 5d240efddc7f ("usb: dwc2: Add exit clock gating from wakeup interrupt") Fixes: 16c729f90bdf ("usb: dwc2: Allow exit clock gating in urb enqueue") Fixes: 401411bbc4e6 ("usb: dwc2: Add exit clock gating before removing driver") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/cbcc2ccd37e89e339130797ed68ae4597db773ac.1708938774.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core_intr.c | 9 ++++++--- drivers/usb/dwc2/gadget.c | 6 ++++++ drivers/usb/dwc2/hcd.c | 2 +- drivers/usb/dwc2/platform.c | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 158ede7538548..f8426e3d2b19b 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -408,7 +409,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -425,7 +427,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 8b15742d9e8aa..56157b91f5050 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3728,6 +3728,12 @@ irq_retry: if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 70b389125f635..dd5b1c5691e11 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -4657,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 58f53faab340f..2e4c6884f36a4 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -298,7 +298,7 @@ static int dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else -- GitLab From 19ca7ef7d83974574b0edc8894607d1544f157d6 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:22:13 +0000 Subject: [PATCH 1364/1418] usb: dwc2: gadget: LPM flow fix commit 5d69a3b54e5a630c90d82a4c2bdce3d53dc78710 upstream. Added functionality to exit from L1 state by device initiation using remote wakeup signaling, in case when function driver queuing request while core in L1 state. Fixes: 273d576c4d41 ("usb: dwc2: gadget: Add functionality to exit from LPM L1 state") Fixes: 88b02f2cb1e1 ("usb: dwc2: Add core state checking") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/b4d9de5382375dddbf7ef6049d9a82066ad87d5d.1710166393.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 1 + drivers/usb/dwc2/core_intr.c | 63 ++++++++++++++++++++++++------------ drivers/usb/dwc2/gadget.c | 4 +++ 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 8c888b76abdf1..b106c0e0b77ba 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1334,6 +1334,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index f8426e3d2b19b..26d752a4c3ca9 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -323,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -335,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } + + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } + + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); - udelay(1); - } while (++i < 200); + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -386,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 56157b91f5050..cb29f9fae2f23 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1416,6 +1416,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", -- GitLab From f74c5e0b54b02706d9a862ac6cddade30ac86bcf Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Fri, 15 Mar 2024 10:01:44 +0800 Subject: [PATCH 1365/1418] usb: udc: remove warning when queue disabled ep commit 2a587a035214fa1b5ef598aea0b81848c5b72e5e upstream. It is possible trigger below warning message from mass storage function, WARNING: CPU: 6 PID: 3839 at drivers/usb/gadget/udc/core.c:294 usb_ep_queue+0x7c/0x104 pc : usb_ep_queue+0x7c/0x104 lr : fsg_main_thread+0x494/0x1b3c Root cause is mass storage function try to queue request from main thread, but other thread may already disable ep when function disable. As there is no function failure in the driver, in order to avoid effort to fix warning, change WARN_ON_ONCE() in usb_ep_queue() to pr_debug(). Suggested-by: Alan Stern Cc: stable@vger.kernel.org Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240315020144.2715575-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 0edd9e53fc5a1..82a10774a7ebc 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } -- GitLab From 2d28af770d4d19e440f56732d8895d30bef07fb0 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 19 Mar 2024 15:43:09 +0800 Subject: [PATCH 1366/1418] usb: typec: Return size of buffer if pd_set operation succeeds commit 53f5094fdf5deacd99b8655df692e9278506724d upstream. The attribute writing should return the number of bytes used from the buffer on success. Fixes: a7cff92f0635 ("usb: typec: USB Power Delivery helpers for ports and partners") Cc: stable@vger.kernel.org Signed-off-by: Kyle Tso Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240319074309.3306579-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 3da404d5178d3..ce83f558fe447 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1245,6 +1245,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, { struct typec_port *port = to_typec_port(dev); struct usb_power_delivery *pd; + int ret; if (!port->ops || !port->ops->pd_set) return -EOPNOTSUPP; @@ -1253,7 +1254,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, if (!pd) return -EINVAL; - return port->ops->pd_set(port, pd); + ret = port->ops->pd_set(port, pd); + if (ret) + return ret; + + return size; } static ssize_t select_usb_power_delivery_show(struct device *dev, -- GitLab From 959aacfe3ab655f537b861a194740a5212c40b35 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:22 +0100 Subject: [PATCH 1367/1418] usb: typec: ucsi: Clear EVENT_PENDING under PPM lock commit 15b2e71b4653b3e13df34695a29ebeee237c5af2 upstream. Suppose we sleep on the PPM lock after clearing the EVENT_PENDING bit because the thread for another connector is executing a command. In this case the command completion of the other command will still report the connector change for our connector. Clear the EVENT_PENDING bit under the PPM lock to avoid another useless call to ucsi_handle_connector_change() in this case. Fixes: c9aed03a0a68 ("usb: ucsi: Add missing ppm_lock") Cc: stable Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0695ee54ff781..1a3801991cd1c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -829,11 +829,11 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) ucsi_partner_task(con, ucsi_check_altmodes, 1, 0); - clear_bit(EVENT_PENDING, &con->ucsi->flags); - mutex_lock(&ucsi->ppm_lock); + clear_bit(EVENT_PENDING, &con->ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); mutex_unlock(&ucsi->ppm_lock); + if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); -- GitLab From f8704d54c8e70b616fc6a8af9ee07d1dfd982746 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:24 +0100 Subject: [PATCH 1368/1418] usb: typec: ucsi: Ack unsupported commands commit 6b5c85ddeea77d18c4b69e3bda60e9374a20c304 upstream. If a command completes the OPM must send an ack. This applies to unsupported commands, too. Send the required ACK for unsupported commands. Signed-off-by: Christian A. Ehrhardt Cc: stable Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 1a3801991cd1c..57ba7c61e5af6 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -138,8 +138,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) -- GitLab From 12d0306cecf1a995619445117b3121617495eb75 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:25 +0100 Subject: [PATCH 1369/1418] usb: typec: ucsi_acpi: Refactor and fix DELL quirk commit 6aaceb7d9cd00f3e065dc4b054ecfe52c5253b03 upstream. Some DELL systems don't like UCSI_ACK_CC_CI commands with the UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE bit set. The current quirk still leaves room for races because it requires two consecutive ACK commands to be sent. Refactor and significantly simplify the quirk to fix this: Send a dummy command and bundle the connector change ack with the command completion ack in a single UCSI_ACK_CC_CI command. This removes the need to probe for the quirk. While there define flag bits for struct ucsi_acpi->flags in ucsi_acpi.c and don't re-use definitions from ucsi.h for struct ucsi->flags. Fixes: f3be347ea42d ("usb: ucsi_acpi: Quirk to ack a connector change ack cmd") Cc: stable@vger.kernel.org Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-5-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 75 +++++++++++++----------------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 48130d636a020..b4d86d47c5db4 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -23,10 +23,11 @@ struct ucsi_acpi { void *base; struct completion complete; unsigned long flags; +#define UCSI_ACPI_SUPPRESS_EVENT 0 +#define UCSI_ACPI_COMMAND_PENDING 1 +#define UCSI_ACPI_ACK_PENDING 2 guid_t guid; u64 cmd; - bool dell_quirk_probed; - bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, int ret; if (ack) - set_bit(ACK_PENDING, &ua->flags); + set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - set_bit(COMMAND_PENDING, &ua->flags); + set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, out_clear_bit: if (ack) - clear_bit(ACK_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - clear_bit(COMMAND_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); return ret; } @@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = { }; /* - * Some Dell laptops expect that an ACK command with the - * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) - * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. - * If this is not done events are not delivered to OSPM and - * subsequent commands will timeout. + * Some Dell laptops don't like ACK commands with the + * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE + * bit set. To work around this send a dummy command and bundle the + * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE + * for the dummy command. */ static int ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - u64 cmd = *(u64 *)val, ack = 0; + u64 cmd = *(u64 *)val; + u64 dummycmd = UCSI_GET_CAPABILITY; int ret; - if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && - cmd & UCSI_ACK_CONNECTOR_CHANGE) - ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; - - ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); - if (ret != 0) - return ret; - if (ack == 0) - return ret; - - if (!ua->dell_quirk_probed) { - ua->dell_quirk_probed = true; - - cmd = UCSI_GET_CAPABILITY; - ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, - sizeof(cmd)); - if (ret == 0) - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, - &ack, sizeof(ack)); - if (ret != -ETIMEDOUT) + if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) { + cmd |= UCSI_ACK_COMMAND_COMPLETE; + + /* + * The UCSI core thinks it is sending a connector change ack + * and will accept new connector change events. We don't want + * this to happen for the dummy command as its response will + * still report the very event that the core is trying to clear. + */ + set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd, + sizeof(dummycmd)); + clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + + if (ret < 0) return ret; - - ua->dell_quirk_active = true; - dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); - dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); } - if (!ua->dell_quirk_active) - return ret; - - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); } static const struct ucsi_operations ucsi_dell_ops = { @@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; - if (UCSI_CCI_CONNECTOR(cci)) + if (UCSI_CCI_CONNECTOR(cci) && + !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) complete(&ua->complete); if (cci & UCSI_CCI_COMMAND_COMPLETE && - test_bit(COMMAND_PENDING, &ua->flags)) + test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } -- GitLab From c223bc352cd34be66b59dfa8b62ba02ec4b78792 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:26 +0100 Subject: [PATCH 1370/1418] usb: typec: ucsi: Clear UCSI_CCI_RESET_COMPLETE before reset commit 3de4f996a0b5412aa451729008130a488f71563e upstream. Check the UCSI_CCI_RESET_COMPLETE complete flag before starting another reset. Use a UCSI_SET_NOTIFICATION_ENABLE command to clear the flag if it is set. Signed-off-by: Christian A. Ehrhardt Cc: stable Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-6-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 57ba7c61e5af6..98f335cbbcdea 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -878,13 +878,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) -- GitLab From 3b9d72442adfbc9ddb0f76dd1b03977b3a578b16 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:17 +0530 Subject: [PATCH 1371/1418] scsi: qla2xxx: Prevent command send on chip reset commit 4895009c4bb72f71f2e682f1e7d2c2d96e482087 upstream. Currently IOCBs are allowed to push through while chip reset could be in progress. During chip reset the outstanding_cmds array is cleared twice. Once when any command on this array is returned as failed and secondly when the array is initialize to zero. If a command is inserted on to the array between these intervals, then the command will be lost. Check for chip reset before sending IOCB. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 8 ++++++-- drivers/scsi/qla2xxx/qla_iocb.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 884ed77259f85..f891f7e05d202 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1187,8 +1187,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9e524d52dc862..8b097cec830d7 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2588,6 +2588,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2693,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2748,6 +2775,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2757,6 +2785,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b.area, fcport->d_id.b.al_pa); wait_for_completion(&elsio->u.els_logo.comp); + qla2x00_free_fcport(fcport); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); @@ -3916,7 +3945,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, -- GitLab From 3cd58084e48ef5d6ed9d25714b5f9e563b846b04 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:18 +0530 Subject: [PATCH 1372/1418] scsi: qla2xxx: Fix N2N stuck connection commit 881eb861ca3877300570db10abbf11494e48548d upstream. Disk failed to rediscover after chip reset error injection. The chip reset happens at the time when a PLOGI is being sent. This causes a flag to be left on which blocks the retry. Clear the blocking flag. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_iocb.c | 32 +++++++++++--------------------- drivers/scsi/qla2xxx/qla_os.c | 2 +- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 2e4537f9e5b50..73cd869caf609 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 8b097cec830d7..97b98ca09acb1 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3042,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3057,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3067,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3085,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3094,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3109,7 +3105,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3118,10 +3113,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3129,21 +3125,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); - - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } + return rval; -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 25ca0544b9639..25d0c2bfdd742 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5562,7 +5562,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); -- GitLab From 14a3ca35c523fc8de16d72c1d93d37234cdf5962 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:19 +0530 Subject: [PATCH 1373/1418] scsi: qla2xxx: Split FCE|EFT trace control commit 76a192e1a566e15365704b9f8fb3b70825f85064 upstream. Current code combines the allocation of FCE|EFT trace buffers and enables the features all in 1 step. Split this step into separate steps in preparation for follow-on patch to allow user to have a choice to enable / disable FCE trace feature. Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 102 +++++++++++++------------------- 1 file changed, 41 insertions(+), 61 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f891f7e05d202..f0a11d42e1fd4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2670,6 +2670,40 @@ exit: return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3673,9 +3707,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3704,27 +3737,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3749,14 +3772,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3764,13 +3779,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3825,10 +3833,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4258,7 +4266,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4325,7 +4332,6 @@ execute_fw_with_lr: goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4354,12 +4360,11 @@ enable_82xx_npiv: if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -7544,7 +7549,6 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; @@ -7638,31 +7642,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { -- GitLab From 101c1d2d46a1170caed0d41f1763ad6c167bb0c0 Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Tue, 27 Feb 2024 22:11:20 +0530 Subject: [PATCH 1374/1418] scsi: qla2xxx: Update manufacturer detail commit 688fa069fda6fce24d243cddfe0c7024428acb74 upstream. Update manufacturer detail from "Marvell Semiconductor, Inc." to "Marvell". Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1713588f671f3..31c451daeeb82 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -83,7 +83,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc." +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, -- GitLab From 1bcbd100abb8a01ea79d3e5568259ae621017788 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:21 +0530 Subject: [PATCH 1375/1418] scsi: qla2xxx: NVME|FCP prefer flag not being honored commit 69aecdd410106dc3a8f543a4f7ec6379b995b8d0 upstream. Changing of [FCP|NVME] prefer flag in flash has no effect on driver. For device that supports both FCP + NVMe over the same connection, driver continues to connect to this device using the previous successful login mode. On completion of flash update, adapter will be reset. Driver will reset the prefer flag based on setting from flash. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f0a11d42e1fd4..c64e44964d840 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7554,6 +7554,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7622,6 +7623,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7692,6 +7702,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); -- GitLab From 09c0ac18cac206ed1218b1fe6c1a0918e5ea9211 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:22 +0530 Subject: [PATCH 1376/1418] scsi: qla2xxx: Fix command flush on cable pull commit a27d4d0e7de305def8a5098a614053be208d1aa1 upstream. System crash due to command failed to flush back to SCSI layer. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 27 PID: 793455 Comm: kworker/u130:6 Kdump: loaded Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] RIP: 0010:__wake_up_common+0x4c/0x190 Code: 24 10 4d 85 c9 74 0a 41 f6 01 04 0f 85 9d 00 00 00 48 8b 43 08 48 83 c3 08 4c 8d 48 e8 49 8d 41 18 48 39 c3 0f 84 f0 00 00 00 <49> 8b 41 18 89 54 24 08 31 ed 4c 8d 70 e8 45 8b 29 41 f6 c5 04 75 RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __wake_up_common_lock+0x7c/0xc0 qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae1407ca000 from port 21:32:00:02:ac:07:ee:b8 loop_id 0x02 s_id 01:02:00 logout 1 keep 0 els_logo 0 ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:00:02:ac:07:ee:b8 state transitioned from ONLINE to LOST - portid=010200. ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320002ac07eeb8. rport ffff8ae598122000 roles 1 ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae14801e000 from port 21:32:01:02:ad:f7:ee:b8 loop_id 0x04 s_id 01:02:01 logout 1 keep 0 els_logo 0 ? __switch_to+0x10c/0x450 ? process_one_work+0x1a7/0x360 qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:01:02:ad:f7:ee:b8 state transitioned from ONLINE to LOST - portid=010201. ? worker_thread+0x1ce/0x390 ? create_worker+0x1a0/0x1a0 qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320102adf7eeb8. rport ffff8ae3b2312800 roles 70 ? kthread+0x10a/0x120 qla2xxx [0000:12:00.1]-2112:3: qla_nvme_unregister_remote_port: unregister remoteport on ffff8ae14801e000 21320102adf7eeb8 ? set_kthread_struct+0x40/0x40 qla2xxx [0000:12:00.1]-2110:3: remoteport_delete of ffff8ae14801e000 21320102adf7eeb8 completed. ? ret_from_fork+0x1f/0x40 qla2xxx [0000:12:00.1]-f086:3: qlt_free_session_done: waiting for sess ffff8ae14801e000 logout The system was under memory stress where driver was not able to allocate an SRB to carry out error recovery of cable pull. The failure to flush causes upper layer to start modifying scsi_cmnd. When the system frees up some memory, the subsequent cable pull trigger another command flush. At this point the driver access a null pointer when attempting to DMA unmap the SGL. Add a check to make sure commands are flush back on session tear down to prevent the null pointer access. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 5a5beb41786ed..043cfa10c7167 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1079,6 +1079,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, -- GitLab From 282877633b25d67021a34169c5b5519b1d4ef65e Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Tue, 27 Feb 2024 22:11:24 +0530 Subject: [PATCH 1377/1418] scsi: qla2xxx: Fix double free of fcport commit 82f522ae0d97119a43da53e0f729275691b9c525 upstream. The server was crashing after LOGO because fcport was getting freed twice. -----------[ cut here ]----------- kernel BUG at mm/slub.c:371! invalid opcode: 0000 1 SMP PTI CPU: 35 PID: 4610 Comm: bash Kdump: loaded Tainted: G OE --------- - - 4.18.0-425.3.1.el8.x86_64 #1 Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 RIP: 0010:set_freepointer.part.57+0x0/0x10 RSP: 0018:ffffb07107027d90 EFLAGS: 00010246 RAX: ffff9cb7e3150000 RBX: ffff9cb7e332b9c0 RCX: ffff9cb7e3150400 RDX: 0000000000001f37 RSI: 0000000000000000 RDI: ffff9cb7c0005500 RBP: fffff693448c5400 R08: 0000000080000000 R09: 0000000000000009 R10: 0000000000000000 R11: 0000000000132af0 R12: ffff9cb7c0005500 R13: ffff9cb7e3150000 R14: ffffffffc06990e0 R15: ffff9cb7ea85ea58 FS: 00007ff6b79c2740(0000) GS:ffff9cb8f7ec0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b426b7d700 CR3: 0000000169c18002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: kfree+0x238/0x250 qla2x00_els_dcmd_sp_free+0x20/0x230 [qla2xxx] ? qla24xx_els_dcmd_iocb+0x607/0x690 [qla2xxx] qla2x00_issue_logo+0x28c/0x2a0 [qla2xxx] ? qla2x00_issue_logo+0x28c/0x2a0 [qla2xxx] ? kernfs_fop_write+0x11e/0x1a0 Remove one of the free calls and add check for valid fcport. Also use function qla2x00_free_fcport() instead of kfree(). Cc: stable@vger.kernel.org Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-9-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 97b98ca09acb1..7bccd525ee19b 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2638,7 +2638,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2751,6 +2752,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2785,7 +2787,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b.area, fcport->d_id.b.al_pa); wait_for_completion(&elsio->u.els_logo.comp); - qla2x00_free_fcport(fcport); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); -- GitLab From 65f195232b371cf05f515db0a6916825645924bc Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Tue, 27 Feb 2024 22:11:25 +0530 Subject: [PATCH 1378/1418] scsi: qla2xxx: Change debug message during driver unload commit b5a30840727a3e41d12a336d19f6c0716b299161 upstream. Upon driver unload, purge_mbox flag is set and the heartbeat monitor thread detects this flag and does not send the mailbox command down to FW with a debug message "Error detected: purge[1] eeh[0] cmd=0x0, Exiting". This being not a real error, change the debug message. Cc: stable@vger.kernel.org Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-10-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index f794f4363a38c..1fd9485985f2e 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; -- GitLab From f30b3ee9a4861bcaea2642b1ec38d06e39938ce1 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:26 +0530 Subject: [PATCH 1379/1418] scsi: qla2xxx: Delay I/O Abort on PCI error commit 591c1fdf2016d118b8fbde427b796fac13f3f070 upstream. Currently when PCI error is detected, I/O is aborted manually through the ABORT IOCB mechanism which is not guaranteed to succeed. Instead, wait for the OS or system to notify driver to wind down I/O through the pci_error_handlers api. Set eeh_busy flag to pause all traffic and wait for I/O to drain. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-11-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 07fbaa452d8a1..0d414c1aa84e7 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; -- GitLab From 00f511d71629eb15715dce62b72117341891eff4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 20 Jul 2023 14:47:27 -0500 Subject: [PATCH 1380/1418] x86/cpu: Enable STIBP on AMD if Automatic IBRS is enabled commit fd470a8beed88440b160d690344fbae05a0b9b1b upstream. Unlike Intel's Enhanced IBRS feature, AMD's Automatic IBRS does not provide protection to processes running at CPL3/user mode, see section "Extended Feature Enable Register (EFER)" in the APM v2 at https://bugzilla.kernel.org/attachment.cgi?id=304652 Explicitly enable STIBP to protect against cross-thread CPL3 branch target injections on systems with Automatic IBRS enabled. Also update the relevant documentation. Fixes: e7862eda309e ("x86/cpu: Support AMD Automatic IBRS") Reported-by: Tom Lendacky Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230720194727.67022-1-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 11 +++++++---- arch/x86/kernel/cpu/bugs.c | 15 +++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 4d186f599d90f..32a8893e56177 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -484,11 +484,14 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks, including cross-thread branch target injections - on SMT systems (STIBP). In other words, eIBRS enables STIBP too. + Spectre v2 variant attacks. - Legacy IBRS systems clear the IBRS bit on exit to userspace and - therefore explicitly enable STIBP for that + On Intel's enhanced IBRS systems, this includes cross-thread branch target + injections on SMT systems (STIBP). In other words, Intel eIBRS enables + STIBP, too. + + AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear + the IBRS bit on exit to userspace, therefore both explicitly enable STIBP. The retpoline mitigation is turned on by default on vulnerable CPUs. It can be forced on or off by the administrator diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 750fb4fc2ac6a..e3fec47a800bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1354,19 +1354,21 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP * is not required. * - * Enhanced IBRS also protects against cross-thread branch target + * Intel's Enhanced IBRS also protects against cross-thread branch target * injection in user-mode as the IBRS bit remains always set which * implicitly enables cross-thread protections. However, in legacy IBRS * mode, the IBRS bit is set only on kernel entry and cleared on return - * to userspace. This disables the implicit cross-thread protection, - * so allow for STIBP to be selected in that case. + * to userspace. AMD Automatic IBRS also does not protect userspace. + * These modes therefore disable the implicit cross-thread protection, + * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; /* @@ -2666,7 +2668,8 @@ static ssize_t rfds_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS)) return ""; switch (spectre_v2_user_stibp) { -- GitLab From f2b85a4cc763841843de693bbd7308fe9a2c4c89 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:44:00 +0100 Subject: [PATCH 1381/1418] tls: fix use-after-free on failed backlog decryption commit 13114dc5543069f7b97991e3b79937b6da05f5b0 upstream. When the decrypt request goes to the backlog and crypto_aead_decrypt returns -EBUSY, tls_do_decryption will wait until all async decryptions have completed. If one of them fails, tls_do_decryption will return -EBADMSG and tls_decrypt_sg jumps to the error path, releasing all the pages. But the pages have been passed to the async callback, and have already been released by tls_decrypt_done. The only true async case is when crypto_aead_decrypt returns -EINPROGRESS. With -EBUSY, we already waited so we can tell tls_sw_recvmsg that the data is available for immediate copy, but we need to notify tls_decrypt_sg (via the new ->async_done flag) that the memory has already been released. Fixes: 859054147318 ("net: tls: handle backlogging of crypto requests") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/4755dd8d9bebdefaa19ce1439b833d6199d4364c.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e723584fc644b..bdb5153f3788a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -51,6 +51,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -279,18 +280,19 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS) + return 0; + if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; ret = ret ?: -EINPROGRESS; } - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); - } else if (darg->async) { - atomic_dec(&ctx->decrypt_pending); - } + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -1681,8 +1683,11 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1694,6 +1699,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; -- GitLab From d8cd93e0304709beed2a78774640d25d38a47d21 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 4 Mar 2024 14:11:19 +0500 Subject: [PATCH 1382/1418] scsi: lpfc: Correct size for cmdwqe/rspwqe for memset() commit 16cc2ba71b9f6440805aef7f92ba0f031f79b765 upstream. The cmdwqe and rspwqe are of type lpfc_wqe128. They should be memset() with the same type. Fixes: 61910d6a5243 ("scsi: lpfc: SLI path split: Refactor CT paths") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240304091119.847060-1-usama.anjum@collabora.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_bsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index b54fafb486e06..2373dad016033 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) } cmdwqe = &cmdiocbq->wqe; - memset(cmdwqe, 0, sizeof(union lpfc_wqe)); + memset(cmdwqe, 0, sizeof(*cmdwqe)); if (phba->sli_rev < LPFC_SLI_REV4) { rspwqe = &rspiocbq->wqe; - memset(rspwqe, 0, sizeof(union lpfc_wqe)); + memset(rspwqe, 0, sizeof(*rspwqe)); } INIT_LIST_HEAD(&head); -- GitLab From 8dbc1762202aa5db49d23346316fe0f140063946 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 4 Mar 2024 14:06:48 +0500 Subject: [PATCH 1383/1418] scsi: lpfc: Correct size for wqe for memset() commit 28d41991182c210ec1654f8af2e140ef4cc73f20 upstream. The wqe is of type lpfc_wqe128. It should be memset with the same type. Fixes: 6c621a2229b0 ("scsi: lpfc: Separate NVMET RQ buffer posting from IO resources SGL/iocbq/context") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240304090649.833953-1-usama.anjum@collabora.com Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index f7cfac0da9b6e..1c64da3b2e9f0 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->cmd_dmabuf = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); -- GitLab From d511040d816941d2b227837e01d9b9b1cc416c03 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Thu, 7 Mar 2024 14:14:12 +0000 Subject: [PATCH 1384/1418] scsi: libsas: Add a helper sas_get_sas_addr_and_dev_type() commit a57345279fd311ba679b8083feb0eec5272c7729 upstream. Add a helper to get attached_sas_addr and device type from disc_resp. Suggested-by: John Garry Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240307141413.48049-2-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_expander.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 63a23251fb1d8..1b8005c07dff4 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1651,6 +1651,16 @@ out_err: /* ---------- Domain revalidation ---------- */ +static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1704,13 +1714,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM; res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; } -- GitLab From 98cfafaf2f414e03302c09e4989f3c35744db5ce Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Thu, 7 Mar 2024 14:14:13 +0000 Subject: [PATCH 1385/1418] scsi: libsas: Fix disk not being scanned in after being removed commit 8e68a458bcf5b5cb9c3624598bae28f08251601f upstream. As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info"), do discovery will send a new SMP_DISCOVER and update phy->phy_change_count. We found that if the disk is reconnected and phy change_count changes at this time, the disk scanning process will not be triggered. Therefore, call sas_set_ex_phy() to update the PHY info with the results of the last query. And because the previous phy info will be used when calling sas_unregister_devs_sas_addr(), sas_unregister_devs_sas_addr() should be called before sas_set_ex_phy(). Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240307141413.48049-3-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_expander.c | 32 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 1b8005c07dff4..4b5ceba68e46e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1977,6 +1977,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1988,33 +1989,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -2026,7 +2035,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; } /* we always have to delete the old device when we went here */ @@ -2035,7 +2044,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; } /** -- GitLab From df84d9f7796feeb8dea0a4721e269da371982b76 Mon Sep 17 00:00:00 2001 From: Kevin Loughlin Date: Wed, 13 Mar 2024 12:15:46 +0000 Subject: [PATCH 1386/1418] x86/sev: Skip ROM range scans and validation for SEV-SNP guests commit 0f4a1e80989aca185d955fcd791d7750082044a2 upstream. SEV-SNP requires encrypted memory to be validated before access. Because the ROM memory range is not part of the e820 table, it is not pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes to access this range, the guest must first validate the range. The current SEV-SNP code does indeed scan the ROM range during early boot and thus attempts to validate the ROM range in probe_roms(). However, this behavior is neither sufficient nor necessary for the following reasons: * With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which falls in the ROM range) prior to validation. For example, Project Oak Stage 0 provides a minimal guest firmware that currently meets these configuration conditions, meaning guests booting atop Oak Stage 0 firmware encounter a problematic call chain during dmi_setup() -> dmi_scan_machine() that results in a crash during boot if SEV-SNP is enabled. * With regards to necessity, SEV-SNP guests generally read garbage (which changes across boots) from the ROM range, meaning these scans are unnecessary. The guest reads garbage because the legacy ROM range is unencrypted data but is accessed via an encrypted PMD during early boot (where the PMD is marked as encrypted due to potentially mapping actually-encrypted data in other PMD-contained ranges). In one exceptional case, EISA probing treats the ROM range as unencrypted data, which is inconsistent with other probing. Continuing to allow SEV-SNP guests to use garbage and to inconsistently classify ROM range encryption status can trigger undesirable behavior. For instance, if garbage bytes appear to be a valid signature, memory may be unnecessarily reserved for the ROM range. Future code or other use cases may result in more problematic (arbitrary) behavior that should be avoided. While one solution would be to overhaul the early PMD mapping to always treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not currently rely on data from the ROM region during early boot (and even if they did, they would be mostly relying on garbage data anyways). As a simpler solution, skip the ROM range scans (and the otherwise- necessary range validation) during SEV-SNP guest early boot. The potential SEV-SNP guest crash due to lack of ROM range validation is thus avoided by simply not accessing the ROM range. In most cases, skip the scans by overriding problematic x86_init functions during sme_early_init() to SNP-safe variants, which can be likened to x86_init overrides done for other platforms (ex: Xen); such overrides also avoid the spread of cc_platform_has() checks throughout the tree. In the exceptional EISA case, still use cc_platform_has() for the simplest change, given (1) checks for guest type (ex: Xen domain status) are already performed here, and (2) these checks occur in a subsys initcall instead of an x86_init function. [ bp: Massage commit message, remove "we"s. ] Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active") Signed-off-by: Kevin Loughlin Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com Signed-off-by: Kevin Loughlin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/sev.h | 4 ++-- arch/x86/include/asm/x86_init.h | 3 ++- arch/x86/kernel/eisa.c | 3 ++- arch/x86/kernel/probe_roms.c | 10 ---------- arch/x86/kernel/setup.c | 3 +-- arch/x86/kernel/sev.c | 27 ++++++++++++--------------- arch/x86/kernel/x86_init.c | 2 ++ arch/x86/mm/mem_encrypt_amd.c | 18 ++++++++++++++++++ 8 files changed, 39 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index cf98fc28601fb..c57dd21155bd7 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -196,12 +196,12 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd unsigned long npages); void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); @@ -219,12 +219,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 034e62838b284..c3e910b1d5a25 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b04490..53935b4d62e30 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include #include #include #include @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dce..cc2c34ba7228a 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 804a252382da7..d1ffac9ad611d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -1032,7 +1031,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index f93ff4794e38f..e35fcc8d4bae4 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -768,21 +769,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static int vmgexit_psc(struct snp_psc_desc *desc) { int cur_entry, end_entry, ret = 0; @@ -2152,6 +2138,17 @@ void __init __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 41e5b4cb898c3..a4a921b9e6646 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include #include #include #include @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 3e93af083e037..d4957eefef267 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,6 +513,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) -- GitLab From ab062fa3dc69aea88fe62162c5881ba14b50ecc5 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 12 Mar 2024 11:48:23 -0400 Subject: [PATCH 1387/1418] USB: core: Fix deadlock in usb_deauthorize_interface() commit 80ba43e9f799cbdd83842fc27db667289b3150f5 upstream. Among the attribute file callback routines in drivers/usb/core/sysfs.c, the interface_authorized_store() function is the only one which acquires a device lock on an ancestor device: It calls usb_deauthorize_interface(), which locks the interface's parent USB device. The will lead to deadlock if another process already owns that lock and tries to remove the interface, whether through a configuration change or because the device has been disconnected. As part of the removal procedure, device_del() waits for all ongoing sysfs attribute callbacks to complete. But usb_deauthorize_interface() can't complete until the device lock has been released, and the lock won't be released until the removal has finished. The mechanism provided by sysfs to prevent this kind of deadlock is to use the sysfs_break_active_protection() function, which tells sysfs not to wait for the attribute callback. Reported-and-tested by: Yue Sun Reported by: xingwei lee Signed-off-by: Alan Stern Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNOOHocDkPoAA@mail.gmail.com/#r Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/sysfs.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index ccf6cd9722693..5f1e07341f363 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1170,14 +1170,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (strtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } -- GitLab From 7b970a145c90260dee678065a9da97c90863a0ef Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Thu, 28 Mar 2024 11:59:13 +0100 Subject: [PATCH 1388/1418] tools/resolve_btfids: fix build with musl libc commit 62248b22d01e96a4d669cde0d7005bd51ebf9e76 upstream. Include the header that defines u32. This fixes build of 6.6.23 and 6.1.83 kernels for Alpine Linux, which uses musl libc. I assume that GNU libc indirecly pulls in linux/types.h. Fixes: 9707ac4fe2f5 ("tools/resolve_btfids: Refactor set sorting with types from btf_ids.h") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218647 Cc: stable@vger.kernel.org Signed-off-by: Natanael Copa Tested-by: Greg Thelen Link: https://lore.kernel.org/r/20240328110103.28734-1-ncopa@alpinelinux.org Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/btf_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 72535f00572f6..72ea363d434db 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; -- GitLab From 347385861c50adc8d4801d4b899eded38a2f04cd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 15:19:55 +0200 Subject: [PATCH 1389/1418] Linux 6.1.84 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240401152530.237785232@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Kelsey Steele Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Mark Brown Tested-by: Mateusz Jończyk Tested-by: Jon Hunter Tested-by: Salvatore Bonaccorso Tested-by: Yann Sionneau Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 38657b3dda2cd..0e33150db2bfc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 83 +SUBLEVEL = 84 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 3de5fca05754d429a04c762e7ad8e8ec48c076a9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 May 2024 06:07:03 +0000 Subject: [PATCH 1390/1418] ANDROID: add pm_runtime_get_if_active to db845 symbol list In the 6.1.79 merge, the msm.ko module now needs to use the pm_runtime_get_if_active symbol for the db845 target, so properly add it to the symbol list so that the build will work properly. Fixes: ff43d92a5625 ("Merge 6.1.79 into android14-6.1-lts") Change-Id: Ic6fcfbe271d1df06cfde11d7a285752073bd8f36 Signed-off-by: Greg Kroah-Hartman --- android/abi_gki_aarch64_db845c | 1 + 1 file changed, 1 insertion(+) diff --git a/android/abi_gki_aarch64_db845c b/android/abi_gki_aarch64_db845c index 9c68b5d9188bc..14c68b6f14013 100644 --- a/android/abi_gki_aarch64_db845c +++ b/android/abi_gki_aarch64_db845c @@ -565,6 +565,7 @@ pm_runtime_forbid pm_runtime_force_resume pm_runtime_force_suspend + pm_runtime_get_if_active __pm_runtime_idle __pm_runtime_resume pm_runtime_set_autosuspend_delay -- GitLab From 5b066041f2b4516f42a42d8fc23e8b60c09c07bb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 May 2024 06:03:30 +0000 Subject: [PATCH 1391/1418] ANDROID: add dma_max_mapping_size to virtual_device symbol list. In commit dc3890441c9e ("drm/virtio: Set segment size for virtio_gpu device"), dma_max_mapping_size() is added to the virtio-gpu driver, but it was not exported, so export it now to allow the driver to build properly. Fixes: dc3890441c9e ("drm/virtio: Set segment size for virtio_gpu device") Change-Id: I80fc4e9ee3ccc809f89d223736564f2bb5123250 Signed-off-by: Greg Kroah-Hartman --- android/abi_gki_aarch64.stg | 10 ++++++++++ android/abi_gki_aarch64_virtual_device | 1 + 2 files changed, 11 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 5aa89d5c6901a..11d5eeb2c32b0 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -360622,6 +360622,15 @@ elf_symbol { type_id: 0x9d27e158 full_name: "dma_map_sgtable" } +elf_symbol { + id: 0xfd190cc3 + name: "dma_max_mapping_size" + is_defined: true + symbol_type: FUNCTION + crc: 0x4e003fd4 + type_id: 0x97a9f709 + full_name: "dma_max_mapping_size" +} elf_symbol { id: 0xd140139f name: "dma_mmap_attrs" @@ -408941,6 +408950,7 @@ interface { symbol_id: 0x18b0baef symbol_id: 0x13f645c1 symbol_id: 0xb989e3a2 + symbol_id: 0xfd190cc3 symbol_id: 0xd140139f symbol_id: 0x009463a5 symbol_id: 0x843aec6c diff --git a/android/abi_gki_aarch64_virtual_device b/android/abi_gki_aarch64_virtual_device index eacbf9364c9d5..fe2b3c0d08f5f 100644 --- a/android/abi_gki_aarch64_virtual_device +++ b/android/abi_gki_aarch64_virtual_device @@ -87,6 +87,7 @@ dma_fence_signal_locked dma_free_attrs dmam_alloc_attrs + dma_max_mapping_size dma_set_coherent_mask dma_set_mask dma_sync_sg_for_device -- GitLab From 706f6a46d88957b49345bc27219364c8ef1d1650 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 07:43:31 +0000 Subject: [PATCH 1392/1418] Revert "usb: roles: don't get/set_role() when usb_role_switch is unregistered" This reverts commit 2f414a56b369129cfdac6ecaf9899f9807a264aa which is commit b787a3e781759026a6212736ef8e52cf83d1821a upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Ia72bbe97772c8e65c2dfe2835b00a49414702dbb Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index a327f8bc57043..d90d164238099 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,7 +21,6 @@ struct usb_role_switch { struct mutex lock; /* device lock*/ struct module *module; /* the module this device depends on */ enum usb_role role; - bool registered; /* From descriptor */ struct device *usb2_port; @@ -48,9 +47,6 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; - if (!sw->registered) - return -EOPNOTSUPP; - mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -76,7 +72,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw) || !sw->registered) + if (IS_ERR_OR_NULL(sw)) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -360,8 +356,6 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } - sw->registered = true; - /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -376,10 +370,8 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) { - sw->registered = false; + if (!IS_ERR_OR_NULL(sw)) device_unregister(&sw->dev); - } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); -- GitLab From 601ccd855a4079a978b52650356cac16a44de255 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 07:43:57 +0000 Subject: [PATCH 1393/1418] Revert "usb: roles: fix NULL pointer issue when put module's reference" This reverts commit 0158216805ca7e498d07de38840d2732166ae5fa which is commit 1c9be13846c0b2abc2480602f8ef421360e1ad9e upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I092d81a7ad00ebe0d0c16509b5789ea0b7de29e4 Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index d90d164238099..32e6d19f7011a 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,7 +19,6 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ - struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -134,7 +133,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->module)); + WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); return sw; } @@ -156,7 +155,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->module)); + WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); return sw; } @@ -171,7 +170,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->module); + module_put(sw->dev.parent->driver->owner); put_device(&sw->dev); } } @@ -188,18 +187,15 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; - struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) { - sw = to_role_switch(dev); - WARN_ON(!try_module_get(sw->module)); - } + if (dev) + WARN_ON(!try_module_get(dev->parent->driver->owner)); - return sw; + return dev ? to_role_switch(dev) : NULL; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -341,7 +337,6 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; - sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; -- GitLab From 9006fc45ee72f9b45acc6813c80449353d0019ba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 07:45:47 +0000 Subject: [PATCH 1394/1418] Revert "arp: Prevent overflow in arp_req_get()." This reverts commit f119f2325ba70cbfdec701000dcad4d88805d5b0 which is commit a7d6027790acea24446ddd6632d394096c0f4667 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I97be23b4073709843571e5b5d05903fda4121870 Signed-off-by: Greg Kroah-Hartman --- net/ipv4/arp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ccff96820a703..9456f5bb35e5d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,8 +1125,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, - min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); + memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; -- GitLab From 290b1bfd8597c710bd83b6bc59e2cb5d21a06077 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 07:46:14 +0000 Subject: [PATCH 1395/1418] Revert "net: dev: Convert sa_data to flexible array in struct sockaddr" This reverts commit fd84a5fae03c0a6ca66d6d7eb9f473b2c7957c21 which is commit b5f0de6df6dce8d641ef58ef7012f3304dffb9a1 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Iecd4acaa6f3d416b1fe7b928d3b4f069bf61861e Signed-off-by: Greg Kroah-Hartman --- include/linux/socket.h | 5 +---- net/core/dev.c | 2 +- net/core/dev_ioctl.c | 2 +- net/packet/af_packet.c | 10 +++++----- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index d79efd0268809..b3c58042bd254 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,10 +33,7 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - union { - char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ - DECLARE_FLEX_ARRAY(char, sa_data); - }; + char sa_data[14]; /* 14 bytes of protocol address */ }; struct linger { diff --git a/net/core/dev.c b/net/core/dev.c index 3102191d0877d..06600fa3f94c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8868,7 +8868,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data_min); + size_t size = sizeof(sa->sa_data); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5cdbfbf9a7dcf..7674bb9f3076c 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data_min), + min(sizeof(ifr->ifr_hwaddr.sa_data), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c3117350f5fbb..51882f07ef70c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data_min) + 1]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); - name[sizeof(uaddr->sa_data_min)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3566,11 +3566,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); return sizeof(*uaddr); -- GitLab From d8fcb4c06b7d72d655d0d96dffe9b8f2bbe6a784 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 07:47:00 +0000 Subject: [PATCH 1396/1418] Revert "mptcp: fix lockless access in subflow ULP diag" This reverts commit 71787c665d09a970b9280c285181d3a2d1bf3bb0 which is commit b8adb69a7d29c2d33eb327bca66476fb6066516b upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I18ef204d3c165adca98146fa07d2fd25671d7b42 Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +----- net/tls/tls_main.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e1ba0fb66ff9b..c3d56b337f358 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2289,7 +2289,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(struct sock *sk, struct sk_buff *skb); + int (*get_info)(const struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f0351..a536586742f28 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,19 +13,17 @@ #include #include "protocol.h" -static int subflow_get_info(struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; - bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; - slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -71,13 +69,11 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); - unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); - unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 8267b37dfa8ed..f2e7302a4d96b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1092,7 +1092,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(struct sock *sk, struct sk_buff *skb) +static int tls_get_info(const struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; -- GitLab From 1c7c9982329d7e7ca5e0f554160e477de6114f37 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 May 2024 06:29:03 +0000 Subject: [PATCH 1397/1418] ANDROID: add blk_mq_freeze_queue and blk_mq_unfreeze_queue to virtual device symbol list In commit 2a52590ac523 ("virtio-blk: Ensure no requests in virtqueues before deleting vqs.") the virtio_blk driver adds calls to blk_mq_freeze_queue and blk_mq_unfreeze_queue, so add them to the virtual device symbol list so that that target will build properly. Fixes: 2a52590ac523 ("virtio-blk: Ensure no requests in virtqueues before deleting vqs.") Change-Id: Iaf7ef825414a5bc3db36cd9479acb0c1a7435e11 Signed-off-by: Greg Kroah-Hartman --- android/abi_gki_aarch64_virtual_device | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/abi_gki_aarch64_virtual_device b/android/abi_gki_aarch64_virtual_device index fe2b3c0d08f5f..22eb80d6bfc83 100644 --- a/android/abi_gki_aarch64_virtual_device +++ b/android/abi_gki_aarch64_virtual_device @@ -18,6 +18,8 @@ bpf_trace_run4 bpf_trace_run5 bpf_trace_run6 + blk_mq_freeze_queue + blk_mq_unfreeze_queue bt_err bt_info bt_warn -- GitLab From e075d7f47a4ab1ba144c7014c3395e6fc80230d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:43:41 +0000 Subject: [PATCH 1398/1418] Revert "Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT" This reverts commit 92b8a3273f3812ba441d2a842d602ff7d33362b3 which is commit 7dcd3e014aa7faeeaf4047190b22d8a19a0db696 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I652f40ab304425a2c5418431515df4e1afa3193e Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8bfef7f81b417..43abdaf92a0ed 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1845,17 +1844,7 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } - + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); -- GitLab From 360b41bc657818769c368712861e1cc442a09220 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:44:03 +0000 Subject: [PATCH 1399/1418] Revert "Bluetooth: qca: add support for WCN7850" This reverts commit 67ffc334b92a96e65b627b6b3349c25946ff69f6 which is commit 67ffc334b92a96e65b627b6b3349c25946ff69f6 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I62f7e652e6cd20291b5897431865e8a3ebbacb4d Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 10 ---------- drivers/bluetooth/btqca.h | 1 - drivers/bluetooth/hci_qca.c | 31 +------------------------------ 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 0211f704a358b..8331090af86ea 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -631,10 +631,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); break; - case QCA_WCN7850: - snprintf(config.fwname, sizeof(config.fwname), - "qca/hmtbtfw%02x.tlv", rom_ver); - break; default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -683,10 +679,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpnv%02x.bin", rom_ver); break; - case QCA_WCN7850: - snprintf(config.fwname, sizeof(config.fwname), - "qca/hmtnv%02x.bin", rom_ver); - break; default: snprintf(config.fwname, sizeof(config.fwname), @@ -705,7 +697,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_QCA6390: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: err = qca_disable_soc_logging(hdev); if (err < 0) return err; @@ -740,7 +731,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_WCN3991: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 03bff5c0059de..fe51c632d7720 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -149,7 +149,6 @@ enum qca_btsoc_type { QCA_QCA6390, QCA_WCN6750, QCA_WCN6855, - QCA_WCN7850, }; #if IS_ENABLED(CONFIG_BT_QCA) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 43abdaf92a0ed..e6ead996948a8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1330,7 +1330,6 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: usleep_range(1000, 10000); break; @@ -1416,7 +1415,6 @@ static int qca_check_speeds(struct hci_uart *hu) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1458,7 +1456,6 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: hci_uart_set_flow_control(hu, true); break; @@ -1492,7 +1489,6 @@ error: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: hci_uart_set_flow_control(hu, false); break; @@ -1760,7 +1756,6 @@ static int qca_power_on(struct hci_dev *hdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: ret = qca_regulator_init(hu); break; @@ -1818,10 +1813,6 @@ static int qca_setup(struct hci_uart *hu) soc_name = "wcn6855"; break; - case QCA_WCN7850: - soc_name = "wcn7850"; - break; - default: soc_name = "ROME/QCA6390"; } @@ -1843,7 +1834,6 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1873,7 +1863,6 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: break; default: @@ -2027,20 +2016,6 @@ static const struct qca_device_data qca_soc_data_wcn6855 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { - .soc_type = QCA_WCN7850, - .vregs = (struct qca_vreg []) { - { "vddio", 5000 }, - { "vddaon", 26000 }, - { "vdddig", 126000 }, - { "vddrfa0p8", 102000 }, - { "vddrfa1p2", 257000 }, - { "vddrfa1p9", 302000 }, - }, - .num_vregs = 6, - .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, -}; - static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -2224,7 +2199,6 @@ static int qca_serdev_probe(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2254,8 +2228,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) GPIOD_IN); if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855 || - data->soc_type == QCA_WCN7850)) + data->soc_type == QCA_WCN6855)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2334,7 +2307,6 @@ static void qca_serdev_remove(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: - case QCA_WCN7850: if (power->vregs_on) { qca_power_shutdown(&qcadev->serdev_hu); break; @@ -2527,7 +2499,6 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, - { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 55a316da31c9638029dc663994ff06e6deda9bf3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:44:46 +0000 Subject: [PATCH 1400/1418] Revert "Bluetooth: qca: use switch case for soc type behavior" This reverts commit fc47ed389a884ee4a5b01f62ecae8137b41f63d7 which is commit 691d54d0f7cb14baac1ff4af210d13c0e4897e27 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I0b3cbead808d92dc25ae36021fd853ed197f15ef Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 87 +++++--------- drivers/bluetooth/btqca.h | 36 ++++++ drivers/bluetooth/hci_qca.c | 233 +++++++++--------------------------- 3 files changed, 120 insertions(+), 236 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8331090af86ea..d775402b33df3 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -604,34 +604,26 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - switch (soc_type) { - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - snprintf(config.fwname, sizeof(config.fwname), - "qca/crbtfw%02x.tlv", rom_ver); - break; - case QCA_WCN3988: + if (soc_type == QCA_WCN3988) { snprintf(config.fwname, sizeof(config.fwname), "qca/apbtfw%02x.tlv", rom_ver); - break; - case QCA_QCA6390: + } else if (qca_is_wcn399x(soc_type)) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crbtfw%02x.tlv", rom_ver); + } else if (soc_type == QCA_QCA6390) { snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); - break; - case QCA_WCN6750: + } else if (soc_type == QCA_WCN6750) { /* Choose mbn file by default.If mbn file is not found * then choose tlv file */ config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - break; - case QCA_WCN6855: + } else if (soc_type == QCA_WCN6855) { snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); - break; - default: + } else { snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); } @@ -647,44 +639,33 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download NVM configuration */ config.type = TLV_TYPE_NVM; - if (firmware_name) { + if (firmware_name) snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - } else { - switch (soc_type) { - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { - snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02xu.bin", rom_ver); - } else { - snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02x.bin", rom_ver); - } - break; - case QCA_WCN3988: - snprintf(config.fwname, sizeof(config.fwname), - "qca/apnv%02x.bin", rom_ver); - break; - case QCA_QCA6390: - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - break; - case QCA_WCN6750: - snprintf(config.fwname, sizeof(config.fwname), - "qca/msnv%02x.bin", rom_ver); - break; - case QCA_WCN6855: + else if (soc_type == QCA_WCN3988) + snprintf(config.fwname, sizeof(config.fwname), + "qca/apnv%02x.bin", rom_ver); + else if (qca_is_wcn399x(soc_type)) { + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), - "qca/hpnv%02x.bin", rom_ver); - break; - - default: + "qca/crnv%02xu.bin", rom_ver); + } else { snprintf(config.fwname, sizeof(config.fwname), - "qca/nvm_%08x.bin", soc_ver); + "qca/crnv%02x.bin", rom_ver); } } + else if (soc_type == QCA_QCA6390) + snprintf(config.fwname, sizeof(config.fwname), + "qca/htnv%02x.bin", rom_ver); + else if (soc_type == QCA_WCN6750) + snprintf(config.fwname, sizeof(config.fwname), + "qca/msnv%02x.bin", rom_ver); + else if (soc_type == QCA_WCN6855) + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); + else + snprintf(config.fwname, sizeof(config.fwname), + "qca/nvm_%08x.bin", soc_ver); err = qca_download_firmware(hdev, &config, soc_type, rom_ver); if (err < 0) { @@ -692,24 +673,16 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - switch (soc_type) { - case QCA_WCN3991: - case QCA_QCA6390: - case QCA_WCN6750: - case QCA_WCN6855: + if (soc_type >= QCA_WCN3991) { err = qca_disable_soc_logging(hdev); if (err < 0) return err; - break; - default: - break; } /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the * VsMsftOpCode. */ switch (soc_type) { - case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fe51c632d7720..fc6cf314eb0ef 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -161,6 +161,27 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); +static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) +{ + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + return true; + default: + return false; + } +} +static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) +{ + return soc_type == QCA_WCN6750; +} +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return soc_type == QCA_WCN6855; +} + #else static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) @@ -188,6 +209,21 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } +static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) +{ + return false; +} + +static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) +{ + return false; +} + +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return false; +} + static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e6ead996948a8..746eb096c037c 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -606,18 +606,9 @@ static int qca_open(struct hci_uart *hu) if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); - switch (qcadev->btsoc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: + if (qca_is_wcn399x(qcadev->btsoc_type) || + qca_is_wcn6750(qcadev->btsoc_type)) hu->init_speed = qcadev->init_speed; - break; - - default: - break; - } if (qcadev->oper_speed) hu->oper_speed = qcadev->oper_speed; @@ -1323,19 +1314,12 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ - switch (qca_soc_type(hu)) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(qca_soc_type(hu)) || + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) usleep_range(1000, 10000); - break; - - default: + else msleep(300); - } return 0; } @@ -1408,19 +1392,13 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { - switch (qca_soc_type(hu)) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(qca_soc_type(hu)) || + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) { if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; - break; - - default: + } else { if (!qca_get_speed(hu, QCA_INIT_SPEED) || !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1449,28 +1427,14 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) /* Disable flow control for wcn3990 to deassert RTS while * changing the baudrate of chip and host. */ - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(soc_type) || + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, true); - break; - default: - break; - } - - switch (soc_type) { - case QCA_WCN3990: + if (soc_type == QCA_WCN3990) { reinit_completion(&qca->drop_ev_comp); set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); - break; - - default: - break; } qca_baudrate = qca_get_baudrate_value(speed); @@ -1482,22 +1446,12 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) host_set_baudrate(hu, speed); error: - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(soc_type) || + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, false); - break; - default: - break; - } - - switch (soc_type) { - case QCA_WCN3990: + if (soc_type == QCA_WCN3990) { /* Wait for the controller to send the vendor event * for the baudrate change command. */ @@ -1509,10 +1463,6 @@ error: } clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); - break; - - default: - break; } } @@ -1674,20 +1624,12 @@ static int qca_regulator_init(struct hci_uart *hu) } } - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: + if (qca_is_wcn399x(soc_type)) { /* Forcefully enable wcn399x to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hu, false); if (ret) return ret; - break; - - default: - break; } /* For wcn6750 need to enable gpio bt_en */ @@ -1704,18 +1646,10 @@ static int qca_regulator_init(struct hci_uart *hu) qca_set_speed(hu, QCA_INIT_SPEED); - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: + if (qca_is_wcn399x(soc_type)) { ret = qca_send_power_pulse(hu, true); if (ret) return ret; - break; - - default: - break; } /* Now the device is in ready state to communicate with host. @@ -1749,17 +1683,11 @@ static int qca_power_on(struct hci_dev *hdev) if (!hu->serdev) return 0; - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(soc_type) || + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { ret = qca_regulator_init(hu); - break; - - default: + } else { qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 1); @@ -1782,7 +1710,6 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; - const char *soc_name; ret = qca_check_speeds(hu); if (ret) @@ -1797,26 +1724,10 @@ static int qca_setup(struct hci_uart *hu) */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - soc_name = "wcn399x"; - break; - - case QCA_WCN6750: - soc_name = "wcn6750"; - break; - - case QCA_WCN6855: - soc_name = "wcn6855"; - break; - - default: - soc_name = "ROME/QCA6390"; - } - bt_dev_info(hdev, "setting up %s", soc_name); + bt_dev_info(hdev, "setting up %s", + qca_is_wcn399x(soc_type) ? "wcn399x" : + (soc_type == QCA_WCN6750) ? "wcn6750" : + (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1827,22 +1738,16 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: + if (qca_is_wcn399x(soc_type) || + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) goto out; - break; - - default: + } else { qca_set_speed(hu, QCA_INIT_SPEED); } @@ -1856,16 +1761,9 @@ retry: qca_baudrate = qca_get_baudrate_value(speed); } - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: - break; - - default: + if (!(qca_is_wcn399x(soc_type) || + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type))) { /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -2041,18 +1939,11 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: + if (qca_is_wcn399x(soc_type)) { host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - break; - - case QCA_WCN6750: - case QCA_WCN6855: + } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -2060,9 +1951,7 @@ static void qca_power_shutdown(struct hci_uart *hu) sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); } - break; - - default: + } else if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2187,18 +2076,11 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); - if (data) + if (data && + (qca_is_wcn399x(data->soc_type) || + qca_is_wcn6750(data->soc_type) || + qca_is_wcn6855(data->soc_type))) { qcadev->btsoc_type = data->soc_type; - else - qcadev->btsoc_type = QCA_ROME; - - switch (qcadev->btsoc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2242,9 +2124,12 @@ static int qca_serdev_probe(struct serdev_device *serdev) BT_ERR("wcn3990 serdev registration failed"); return err; } - break; + } else { + if (data) + qcadev->btsoc_type = data->soc_type; + else + qcadev->btsoc_type = QCA_ROME; - default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR_OR_NULL(qcadev->bt_en)) { @@ -2300,23 +2185,13 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct qca_power *power = qcadev->bt_power; - switch (qcadev->btsoc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - case QCA_WCN6750: - case QCA_WCN6855: - if (power->vregs_on) { - qca_power_shutdown(&qcadev->serdev_hu); - break; - } - fallthrough; - - default: - if (qcadev->susclk) - clk_disable_unprepare(qcadev->susclk); - } + if ((qca_is_wcn399x(qcadev->btsoc_type) || + qca_is_wcn6750(qcadev->btsoc_type) || + qca_is_wcn6855(qcadev->btsoc_type)) && + power->vregs_on) + qca_power_shutdown(&qcadev->serdev_hu); + else if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); hci_uart_unregister_device(&qcadev->serdev_hu); } -- GitLab From b904eebfd45029b20f9f312b430f2a972ec3c67b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:45:26 +0000 Subject: [PATCH 1401/1418] Revert "Bluetooth: btqca: Add WCN3988 support" This reverts commit 940963613275a39fd693fb1969c1c6fbc0798a21 which is commit f904feefe60c28b6852d5625adc4a2c39426a2d9 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I1ca051a8ba0f510b7c48a13856ed54859d09f67c Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 13 ++----------- drivers/bluetooth/btqca.h | 12 ++---------- drivers/bluetooth/hci_qca.c | 12 ------------ 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d775402b33df3..d40a6041c48cd 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -594,20 +594,14 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Firmware files to download are based on ROM version. * ROM version is derived from last two bytes of soc_ver. */ - if (soc_type == QCA_WCN3988) - rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); - else - rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); if (soc_type == QCA_WCN6750) qca_send_patch_config_cmd(hdev); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (soc_type == QCA_WCN3988) { - snprintf(config.fwname, sizeof(config.fwname), - "qca/apbtfw%02x.tlv", rom_ver); - } else if (qca_is_wcn399x(soc_type)) { + if (qca_is_wcn399x(soc_type)) { snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); } else if (soc_type == QCA_QCA6390) { @@ -642,9 +636,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (firmware_name) snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - else if (soc_type == QCA_WCN3988) - snprintf(config.fwname, sizeof(config.fwname), - "qca/apnv%02x.bin", rom_ver); else if (qca_is_wcn399x(soc_type)) { if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fc6cf314eb0ef..b884095bcd9d0 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -142,7 +142,6 @@ enum qca_btsoc_type { QCA_INVALID = -1, QCA_AR3002, QCA_ROME, - QCA_WCN3988, QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, @@ -163,15 +162,8 @@ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - return true; - default: - return false; - } + return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || + soc_type == QCA_WCN3998; } static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 746eb096c037c..f217c2821b9fb 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1832,17 +1832,6 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { - .soc_type = QCA_WCN3988, - .vregs = (struct qca_vreg []) { - { "vddio", 15000 }, - { "vddxo", 80000 }, - { "vddrf", 300000 }, - { "vddch0", 450000 }, - }, - .num_vregs = 4, -}; - static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { @@ -2368,7 +2357,6 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, - { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, -- GitLab From d9340ec8dd40f61c9239c677cad7cdd0780c8212 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:45:48 +0000 Subject: [PATCH 1402/1418] Revert "Bluetooth: btqca: use le32_to_cpu for ver.soc_id" This reverts commit 29059d0f3bc21f76db5a375a70a449ba86f3d6cc which is commit 8153b738bc547878a017889d2b1cf8dd2de0e0c6 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I8399038f8d89a402d22d1071e4a503eab05d6174 Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d40a6041c48cd..4cb541096b934 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -637,7 +637,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); else if (qca_is_wcn399x(soc_type)) { - if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + if (ver.soc_id == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), "qca/crnv%02xu.bin", rom_ver); } else { -- GitLab From a3778f3e132fa15caf13710833cad1387bf873e2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:46:12 +0000 Subject: [PATCH 1403/1418] Revert "Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855" This reverts commit e5383662fd02ad9516ae2c27f85cd56296372ba9 which is commit 095327fede005f4b14d40b2183b2f7965c739dbd upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I705d791c9553d22c3c1d286c6c77ead07e0b1428 Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 14 +-------- drivers/bluetooth/btqca.h | 10 ------- drivers/bluetooth/hci_qca.c | 57 +++++++++---------------------------- 3 files changed, 15 insertions(+), 66 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4cb541096b934..d7d0c9de3dc31 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -614,9 +614,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - } else if (soc_type == QCA_WCN6855) { - snprintf(config.fwname, sizeof(config.fwname), - "qca/hpbtfw%02x.tlv", rom_ver); } else { snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -651,9 +648,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, else if (soc_type == QCA_WCN6750) snprintf(config.fwname, sizeof(config.fwname), "qca/msnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6855) - snprintf(config.fwname, sizeof(config.fwname), - "qca/hpnv%02x.bin", rom_ver); else snprintf(config.fwname, sizeof(config.fwname), "qca/nvm_%08x.bin", soc_ver); @@ -691,17 +685,11 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - switch (soc_type) { - case QCA_WCN3991: - case QCA_WCN6750: - case QCA_WCN6855: + if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) return err; - break; - default: - break; } bt_dev_info(hdev, "QCA setup on UART is completed"); diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index b884095bcd9d0..61e9a50e66ae1 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -147,7 +147,6 @@ enum qca_btsoc_type { QCA_WCN3991, QCA_QCA6390, QCA_WCN6750, - QCA_WCN6855, }; #if IS_ENABLED(CONFIG_BT_QCA) @@ -169,10 +168,6 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { return soc_type == QCA_WCN6750; } -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6855; -} #else @@ -211,11 +206,6 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) return false; } -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return false; -} - static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index f217c2821b9fb..0e908a337e534 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1315,8 +1315,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) /* Give the controller time to process the request */ if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) + qca_is_wcn6750(qca_soc_type(hu))) usleep_range(1000, 10000); else msleep(300); @@ -1393,8 +1392,7 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) { + qca_is_wcn6750(qca_soc_type(hu))) { if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1428,8 +1426,7 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) * changing the baudrate of chip and host. */ if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + qca_is_wcn6750(soc_type)) hci_uart_set_flow_control(hu, true); if (soc_type == QCA_WCN3990) { @@ -1447,8 +1444,7 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) error: if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + qca_is_wcn6750(soc_type)) hci_uart_set_flow_control(hu, false); if (soc_type == QCA_WCN3990) { @@ -1684,8 +1680,7 @@ static int qca_power_on(struct hci_dev *hdev) return 0; if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + qca_is_wcn6750(soc_type)) { ret = qca_regulator_init(hu); } else { qcadev = serdev_device_get_drvdata(hu->serdev); @@ -1726,8 +1721,7 @@ static int qca_setup(struct hci_uart *hu) bt_dev_info(hdev, "setting up %s", qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : - (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); + (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1739,8 +1733,7 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + qca_is_wcn6750(soc_type)) { set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1762,8 +1755,7 @@ retry: } if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type))) { + qca_is_wcn6750(soc_type))) { /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1889,20 +1881,6 @@ static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6855 = { - .soc_type = QCA_WCN6855, - .vregs = (struct qca_vreg []) { - { "vddio", 5000 }, - { "vddbtcxmx", 126000 }, - { "vddrfacmn", 12500 }, - { "vddrfa0p8", 102000 }, - { "vddrfa1p7", 302000 }, - { "vddrfa1p2", 257000 }, - }, - .num_vregs = 6, - .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, -}; - static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -1932,7 +1910,7 @@ static void qca_power_shutdown(struct hci_uart *hu) host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { + } else if (soc_type == QCA_WCN6750) { gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -2067,8 +2045,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (data && (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type) || - qca_is_wcn6855(data->soc_type))) { + qca_is_wcn6750(data->soc_type))) { qcadev->btsoc_type = data->soc_type; qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), @@ -2088,18 +2065,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && - (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) { + if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); power_ctrl_enabled = false; } qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && - (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) + if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2175,9 +2148,8 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_power *power = qcadev->bt_power; if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type) || - qca_is_wcn6855(qcadev->btsoc_type)) && - power->vregs_on) + qca_is_wcn6750(qcadev->btsoc_type)) && + power->vregs_on) qca_power_shutdown(&qcadev->serdev_hu); else if (qcadev->susclk) clk_disable_unprepare(qcadev->susclk); @@ -2361,7 +2333,6 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, - { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 3011e898b23c8ab431781694b8f957f8159e796e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:46:33 +0000 Subject: [PATCH 1404/1418] Revert "Bluetooth: hci_qca: mark OF related data as maybe unused" This reverts commit eb7b5777d3c7f5dbbb0736f638068f50006d81b0 which is commit 44fac8a2fd2f72ee98ee41e6bc9ecc7765b5d3cc upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I02cc1a461bc2e554cd4aa435a6b3bf25dfcfcfd8 Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0e908a337e534..76ceb8a0183d1 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1824,7 +1824,7 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { +static const struct qca_device_data qca_soc_data_wcn3990 = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1835,7 +1835,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { +static const struct qca_device_data qca_soc_data_wcn3991 = { .soc_type = QCA_WCN3991, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1847,7 +1847,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { +static const struct qca_device_data qca_soc_data_wcn3998 = { .soc_type = QCA_WCN3998, .vregs = (struct qca_vreg []) { { "vddio", 10000 }, @@ -1858,13 +1858,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { +static const struct qca_device_data qca_soc_data_qca6390 = { .soc_type = QCA_QCA6390, .num_vregs = 0, .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { +static const struct qca_device_data qca_soc_data_wcn6750 = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { { "vddio", 5000 }, -- GitLab From 2e193db1b464fca8a609934d9b476b4e7ab51532 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:48:23 +0000 Subject: [PATCH 1405/1418] Revert "scsi: sd: usb_storage: uas: Access media prior to querying device properties" This reverts commit b73dd5f9997279715cd450ee8ca599aaff2eabb9 which is commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I54aee6b89e0350b57183b081535ccc4f6d0a8638 Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +------------------------- drivers/usb/storage/scsiglue.c | 7 ------- drivers/usb/storage/uas.c | 7 ------- include/scsi/scsi_device.h | 1 - 4 files changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 10bf69665c6b4..0efc659b83368 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3226,24 +3226,6 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } -static void sd_read_block_zero(struct scsi_disk *sdkp) -{ - unsigned int buf_len = sdkp->device->sector_size; - char *buffer, cmd[10] = { }; - - buffer = kmalloc(buf_len, GFP_KERNEL); - if (!buffer) - return; - - cmd[0] = READ_10; - put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ - put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ - - scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, - SD_TIMEOUT, sdkp->max_retries, NULL); - kfree(buffer); -} - /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3283,13 +3265,7 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - /* - * Some USB/UAS devices return generic values for mode pages - * until the media has been accessed. Trigger a READ operation - * to force the device to populate mode pages. - */ - if (sdp->read_before_ms) - sd_read_block_zero(sdkp); + /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 12cf9940e5b67..c54e9805da536 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,13 +179,6 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ed22053b3252f..de3836412bf32 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,13 +878,6 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 7fca6e8421d63..7d894e474a078 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -179,7 +179,6 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ - unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From 045f29a0bdcaeeea6e000797306287cab98879b2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:49:24 +0000 Subject: [PATCH 1406/1418] Revert "usb: gadget: Properly configure the device for remote wakeup" This reverts commit ed9fdc82cafbcf8a46b55d315219bf9464621bca which is commit b93c2a68f3d9dc98ec30dcb342ae47c1c8d09d18 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Ib55f9062147895c79170426d3455dcef12d0f25f Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 18 ------------------ drivers/usb/gadget/configfs.c | 3 --- drivers/usb/gadget/udc/core.c | 27 --------------------------- drivers/usb/gadget/udc/trace.h | 5 ----- include/linux/usb/composite.h | 2 -- include/linux/usb/gadget.h | 8 -------- 6 files changed, 63 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 247cca46cdfae..cb0a4e2cdbb73 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -511,19 +511,6 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, return min(val, 900U) / 8; } -void check_remote_wakeup_config(struct usb_gadget *g, - struct usb_configuration *c) -{ - if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { - /* Reset the rw bit if gadget is not capable of it */ - if (!g->wakeup_capable && g->ops->set_remote_wakeup) { - WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", - c->bConfigurationValue); - c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; - } - } -} - static int config_buf(struct usb_configuration *config, enum usb_device_speed speed, void *buf, u8 type) { @@ -972,11 +959,6 @@ static int set_config(struct usb_composite_dev *cdev, power = min(power, 500U); else power = min(power, 900U); - - if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) - usb_gadget_set_remote_wakeup(gadget, 1); - else - usb_gadget_set_remote_wakeup(gadget, 0); done: if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) usb_gadget_set_selfpowered(gadget); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index fdcdab8b6f50b..96136bdcd3283 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1408,9 +1408,6 @@ static int configfs_composite_bind(struct usb_gadget *gadget, if (gadget_is_otg(gadget)) c->descriptors = otg_desc; - /* Properly configure the bmAttributes wakeup bit */ - check_remote_wakeup_config(gadget, c); - cfg = container_of(c, struct config_usb_cfg, c); if (!list_empty(&cfg->string_list)) { i = 0; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index e0cea717782ef..1f56b770465ed 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -525,33 +525,6 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_wakeup); -/** - * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. - * @gadget:the device being configured for remote wakeup - * @set:value to be configured. - * - * set to one to enable remote wakeup feature and zero to disable it. - * - * returns zero on success, else negative errno. - */ -int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) -{ - int ret = 0; - - if (!gadget->ops->set_remote_wakeup) { - ret = -EOPNOTSUPP; - goto out; - } - - ret = gadget->ops->set_remote_wakeup(gadget, set); - -out: - trace_usb_gadget_set_remote_wakeup(gadget, ret); - - return ret; -} -EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); - /** * usb_gadget_set_selfpowered - sets the device selfpowered feature. * @gadget:the device being declared as self-powered diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h index a5ed26fbc2dad..abdbcb1bacb0b 100644 --- a/drivers/usb/gadget/udc/trace.h +++ b/drivers/usb/gadget/udc/trace.h @@ -91,11 +91,6 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_ARGS(g, ret) ); -DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, - TP_PROTO(struct usb_gadget *g, int ret), - TP_ARGS(g, ret) -); - DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 1c46a01042248..814f657992007 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -412,8 +412,6 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, struct usb_ep *ep0); void composite_dev_cleanup(struct usb_composite_dev *cdev); -void check_remote_wakeup_config(struct usb_gadget *g, - struct usb_configuration *c); static inline struct usb_composite_driver *to_cdriver( struct usb_gadget_driver *gdrv) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index cc56814e047ff..a10c1a9971b4a 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -315,7 +315,6 @@ struct usb_udc; struct usb_gadget_ops { int (*get_frame)(struct usb_gadget *); int (*wakeup)(struct usb_gadget *); - int (*set_remote_wakeup)(struct usb_gadget *, int set); int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); int (*vbus_session) (struct usb_gadget *, int is_active); int (*vbus_draw) (struct usb_gadget *, unsigned mA); @@ -395,8 +394,6 @@ struct usb_gadget_ops { * @connected: True if gadget is connected. * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag * indicates that it supports LPM as per the LPM ECN & errata. - * @wakeup_capable: True if gadget is capable of sending remote wakeup. - * @wakeup_armed: True if gadget is armed by the host for remote wakeup. * @irq: the interrupt number for device controller. * @id_number: a unique ID number for ensuring that gadget names are distinct * @@ -458,8 +455,6 @@ struct usb_gadget { unsigned deactivated:1; unsigned connected:1; unsigned lpm_capable:1; - unsigned wakeup_capable:1; - unsigned wakeup_armed:1; int irq; int id_number; }; @@ -616,7 +611,6 @@ static inline int gadget_is_otg(struct usb_gadget *g) #if IS_ENABLED(CONFIG_USB_GADGET) int usb_gadget_frame_number(struct usb_gadget *gadget); int usb_gadget_wakeup(struct usb_gadget *gadget); -int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); int usb_gadget_set_selfpowered(struct usb_gadget *gadget); int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); int usb_gadget_vbus_connect(struct usb_gadget *gadget); @@ -632,8 +626,6 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_wakeup(struct usb_gadget *gadget) { return 0; } -static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) -{ return 0; } static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) -- GitLab From ff903561b7d7c6c608da813193ea360ddc36afa9 Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Thu, 28 Mar 2024 11:59:13 +0100 Subject: [PATCH 1407/1418] UPSTREAM: tools/resolve_btfids: fix build with musl libc commit 62248b22d01e96a4d669cde0d7005bd51ebf9e76 upstream. Include the header that defines u32. This fixes build of 6.6.23 and 6.1.83 kernels for Alpine Linux, which uses musl libc. I assume that GNU libc indirecly pulls in linux/types.h. Fixes: 9707ac4fe2f5 ("tools/resolve_btfids: Refactor set sorting with types from btf_ids.h") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218647 Cc: stable@vger.kernel.org Change-Id: I34cd9d80bc1623c0caa1c2f894710e6640b4c0bc Signed-off-by: Natanael Copa Tested-by: Greg Thelen Link: https://lore.kernel.org/r/20240328110103.28734-1-ncopa@alpinelinux.org Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7b970a145c90260dee678065a9da97c90863a0ef) Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/btf_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 72535f00572f6..72ea363d434db 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; -- GitLab From 0ba60e394b4579f5a84166039bc3073d5af28bd7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:41:05 +0000 Subject: [PATCH 1408/1418] Revert "Bluetooth: Fix eir name length" This reverts commit 262a77d85e8b448ac83eb4e882e0f63056b19f21 which is commit 2ab3e8d67fc1d4a7638b769cf83023ec209fc0a9 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Iaeeb3fa9c2f11e4e24d212a7c703cda24c01fa65 Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/eir.c | 29 ++++++++++++++++++++++------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a34..9214189279e80 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,33 +13,48 @@ #define PNP_INFO_SVCLASS_ID 0x1200 +static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) +{ + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + /* If data is already NULL terminated just pass it directly */ + if (data[data_len - 1] == '\0') + return eir_append_data(eir, eir_len, type, data, data_len); + + memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(eir, eir_len, type, name, sizeof(name)); +} + u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; size_t complete_len; - /* no space left for name (+ type + len) */ - if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2) + /* no space left for name (+ NULL + type + len) */ + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; /* use complete name if present and fits */ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len); + return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len + 1); /* use short name if present */ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, hdev->short_name, - short_len); + short_len == HCI_MAX_SHORT_NAME_LENGTH ? + short_len : short_len + 1); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ if (complete_len) - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 716f6dc4934b7..21c0924787e22 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8465,7 +8465,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, static u8 calculate_name_len(struct hci_dev *hdev) { - u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; return eir_append_local_name(hdev, buf, 0); } -- GitLab From 7386876ffd0bd0f3a2bc493a35bab85b386f4dd0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:41:32 +0000 Subject: [PATCH 1409/1418] Revert "Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH" This reverts commit 99f30e12e588f9982a6eb1916e53510bff25b3b8 which is commit db08722fc7d46168fe31d9b8a7b29229dd959f9f upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I8de43981e4a64a6484e85d0e2f3ad77f0dd11f23 Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 6 +++--- net/bluetooth/eir.c | 2 +- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3072f758a6bed..21fa6d0670b75 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -82,7 +82,7 @@ struct discovery_state { u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; - u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH]; + u8 last_adv_data[HCI_MAX_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; @@ -294,7 +294,7 @@ struct adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[HCI_MAX_EXT_AD_LENGTH]; + __u8 value[HCI_MAX_AD_LENGTH]; }; struct adv_rssi_thresholds { @@ -733,7 +733,7 @@ struct hci_conn { __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; - __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; + __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; __u8 le_per_adv_data_len; diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 9214189279e80..8a85f6cdfbc16 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -33,7 +33,7 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) size_t complete_len; /* no space left for name (+ NULL + type + len) */ - if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; /* use complete name if present and fits */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 21c0924787e22..1486fb9bb78f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5378,9 +5378,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); -- GitLab From c0dded0f8b1510c1b5ef3554a044de37567ace78 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:43:47 +0000 Subject: [PATCH 1410/1418] Revert "Bluetooth: fix use-after-free in accessing skb after sending it" This reverts commit 715264ad09fd4004e347cdb79fa58a4f2344f13f which is commit 947ec0d002dce8577b655793dcc6fc78d67b7cb6 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Icc979230a67566ee6a64cee43c91ec710c53c019 Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 050fae54946bd..824dde465dd18 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4174,7 +4174,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (hci_req_status_pend(hdev) && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + hdev->req_skb = skb_clone(skb, GFP_KERNEL); } atomic_dec(&hdev->cmd_cnt); -- GitLab From 6147e04a86231bc771f35448c6bb89947c292f18 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:44:20 +0000 Subject: [PATCH 1411/1418] Revert "Bluetooth: hci_sync: Fix overwriting request callback" This reverts commit da77c1d39bc527b31890bfa0405763c82828defb which is commit 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I4250b5552873f25a1a0dec32159511eeb96aac9a Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 46 ++++++++++---------------------- net/bluetooth/hci_event.c | 18 ++++++------- net/bluetooth/hci_sync.c | 21 +++------------ 5 files changed, 27 insertions(+), 61 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 21fa6d0670b75..bab16a3fd6470 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -550,7 +550,6 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; - struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bac5a369d2bef..f752a9f9bb9c7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2813,7 +2813,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, ECANCELED); + hci_cmd_sync_cancel(hdev, -ECANCELED); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 824dde465dd18..af6d3af25a20c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1491,8 +1491,8 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->req_skb) { - u16 opcode = hci_skb_opcode(hdev->req_skb); + if (hdev->sent_cmd) { + u16 opcode = hci_skb_opcode(hdev->sent_cmd); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); @@ -2790,7 +2790,6 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); - kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -3120,33 +3119,21 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { struct hci_command_hdr *hdr; - if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) + if (!hdev->sent_cmd) return NULL; - hdr = (void *)skb->data; + hdr = (void *) hdev->sent_cmd->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - return skb->data + HCI_COMMAND_HDR_SIZE; -} + BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); -/* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) -{ - void *data; - - /* Check if opcode matches last sent command */ - data = hci_cmd_data(hdev->sent_cmd, opcode); - if (!data) - /* Check if opcode matches last request */ - data = hci_cmd_data(hdev->req_skb, opcode); - - return data; + return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } /* Get data from last received event */ @@ -4042,19 +4029,17 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; - skb = hdev->req_skb; - /* If this was the last command in a request the complete - * callback would be found in hdev->req_skb instead of the + * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } - if (skb && bt_cb(skb)->hci.req_complete) { - *req_complete = bt_cb(skb)->hci.req_complete; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } @@ -4171,11 +4156,8 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev) && - !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { - kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(skb, GFP_KERNEL); - } + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b150dee88f35c..4ddffca04774e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4329,7 +4329,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { + if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -7149,10 +7149,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && - hci_skb_event(hdev->req_skb) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->req_skb); + if (hdev->sent_cmd && + hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && + hci_skb_event(hdev->sent_cmd) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->sent_cmd); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7539,10 +7539,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && - hci_skb_event(hdev->req_skb) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), + if (hdev->sent_cmd && + hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && + hci_skb_event(hdev->sent_cmd) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 65b2ad34179f8..d0029f10d9023 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -31,10 +31,6 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - /* Free the request command so it is not used as response */ - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -42,7 +38,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_rsp = skb_get(skb); + hdev->req_skb = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -190,8 +186,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_rsp; - hdev->req_rsp = NULL; + skb = hdev->req_skb; + hdev->req_skb = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -4883,11 +4879,6 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } - if (hdev->req_skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - } - clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5049,12 +5040,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } - /* Drop last request */ - if (hdev->req_skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - } - clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); -- GitLab From 50619421eada617acc8f4f37013557e1107a70c8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 14:11:25 +0000 Subject: [PATCH 1412/1418] Revert "Bluetooth: hci_core: Cancel request on command timeout" This reverts commit 1b6cfa4c760e5f3729e21b725c89f6ddb9be5abf which is commit 63298d6e752fc0ec7f5093860af8bc9f047b30c8 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I63e22487a918e73d667b231128efcfceb48f528b Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_core.c | 86 +++++++++++--------------------- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_sync.c | 20 ++++---- net/bluetooth/mgmt.c | 2 +- 5 files changed, 40 insertions(+), 72 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 59d15b1a978ab..2fa976c466b80 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -39,7 +39,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index af6d3af25a20c..458f20f53f8e9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1492,11 +1492,10 @@ static void hci_cmd_timeout(struct work_struct *work) cmd_timer.work); if (hdev->sent_cmd) { - u16 opcode = hci_skb_opcode(hdev->sent_cmd); + struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; + u16 opcode = __le16_to_cpu(sent->opcode); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); - - hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } @@ -2821,23 +2820,6 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) return ret; } -/* Cancel ongoing command synchronously: - * - * - Cancel command timer - * - Reset command counter - * - Cancel command request - */ -static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) -{ - bt_dev_dbg(hdev, "err 0x%2.2x", err); - - cancel_delayed_work_sync(&hdev->cmd_timer); - cancel_delayed_work_sync(&hdev->ncmd_timer); - atomic_set(&hdev->cmd_cnt, 1); - - hci_cmd_sync_cancel_sync(hdev, -err); -} - /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -2855,7 +2837,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4135,33 +4117,6 @@ static void hci_rx_work(struct work_struct *work) } } -static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) -{ - int err; - - bt_dev_dbg(hdev, "skb %p", skb); - - kfree_skb(hdev->sent_cmd); - - hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); - if (!hdev->sent_cmd) { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - return; - } - - err = hci_send_frame(hdev, skb); - if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); - return; - } - - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); - - atomic_dec(&hdev->cmd_cnt); -} - static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); @@ -4176,15 +4131,30 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - hci_send_cmd_sync(hdev, skb); - - rcu_read_lock(); - if (test_bit(HCI_RESET, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - cancel_delayed_work(&hdev->cmd_timer); - else - queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, - HCI_CMD_TIMEOUT); - rcu_read_unlock(); + kfree_skb(hdev->sent_cmd); + + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); + if (hdev->sent_cmd) { + int res; + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); + atomic_dec(&hdev->cmd_cnt); + + res = hci_send_frame(hdev, skb); + if (res < 0) + __hci_cmd_sync_cancel(hdev, -res); + + rcu_read_lock(); + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + cancel_delayed_work(&hdev->cmd_timer); + else + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); + } else { + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + } } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 4468647df6722..f7e006a363829 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -916,7 +916,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - hci_cmd_sync_cancel_sync(hdev, ENODEV); + __hci_cmd_sync_cancel(hdev, ENODEV); cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d0029f10d9023..c03729c10fdd6 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -651,7 +651,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) mutex_unlock(&hdev->cmd_sync_work_lock); } -void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -659,17 +659,15 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); + cancel_delayed_work_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->ncmd_timer); + atomic_set(&hdev->cmd_cnt, 1); + + wake_up_interruptible(&hdev->req_wait_q); } } -EXPORT_SYMBOL(hci_cmd_sync_cancel); -/* Cancel ongoing command request synchronously: - * - * - Set result and mark status to HCI_REQ_CANCELED - * - Wakeup command sync thread - */ -void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -677,10 +675,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - wake_up_interruptible(&hdev->req_wait_q); + queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } } -EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); +EXPORT_SYMBOL(hci_cmd_sync_cancel); /* Submit HCI command to be run in as cmd_sync_work: * diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1486fb9bb78f7..efeea7994d22e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1403,7 +1403,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, /* Cancel potentially blocking sync operation before power off */ if (cp->val == 0x00) { - hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } else { -- GitLab From 229c1b5f617945cedce0dac4f64f2a7b24d19634 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 14:11:58 +0000 Subject: [PATCH 1413/1418] Revert "Bluetooth: hci_conn: Consolidate code for aborting connections" This reverts commit 6083089ab00631617f9eac678df3ab050a9d837a which is commit a13f316e90fdb1fb6df6582e845aa9b3270f3581 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: Ic482b9be644466e06baa4bc0f01a7cbf63b905fb Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 154 +++++++++++++++++++++++++------ net/bluetooth/hci_sync.c | 23 ++--- net/bluetooth/mgmt.c | 15 ++- 4 files changed, 147 insertions(+), 47 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bab16a3fd6470..357d4f5a38ac3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -745,7 +745,6 @@ struct hci_conn { unsigned long flags; enum conn_reasons conn_reason; - __u8 abort_reason; __u32 clock; __u16 clock_accuracy; @@ -765,6 +764,7 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; + struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f752a9f9bb9c7..12d36875358b9 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -175,6 +175,57 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } +static void le_scan_cleanup(struct work_struct *work) +{ + struct hci_conn *conn = container_of(work, struct hci_conn, + le_scan_cleanup); + struct hci_dev *hdev = conn->hdev; + struct hci_conn *c = NULL; + + BT_DBG("%s hcon %p", hdev->name, conn); + + hci_dev_lock(hdev); + + /* Check that the hci_conn is still around */ + rcu_read_lock(); + list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { + if (c == conn) + break; + } + rcu_read_unlock(); + + if (c == conn) { + hci_connect_le_scan_cleanup(conn, 0x00); + hci_conn_cleanup(conn); + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + hci_conn_put(conn); +} + +static void hci_connect_le_scan_remove(struct hci_conn *conn) +{ + BT_DBG("%s hcon %p", conn->hdev->name, conn); + + /* We can't call hci_conn_del/hci_conn_cleanup here since that + * could deadlock with another hci_conn_del() call that's holding + * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). + * Instead, grab temporary extra references to the hci_dev and + * hci_conn and perform the necessary cleanup in a separate work + * callback. + */ + + hci_dev_hold(conn->hdev); + hci_conn_get(conn); + + /* Even though we hold a reference to the hdev, many other + * things might get cleaned up meanwhile, including the hdev's + * own workqueue, so we can't use that for scheduling. + */ + schedule_work(&conn->le_scan_cleanup); +} + static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -621,6 +672,13 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; + /* LE connections in scanning state need special handling */ + if (conn->state == BT_CONNECT && conn->type == LE_LINK && + test_bit(HCI_CONN_SCANNING, &conn->flags)) { + hci_connect_le_scan_remove(conn); + return; + } + hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } @@ -992,6 +1050,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); + INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -2778,46 +2837,81 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } -static int abort_conn_sync(struct hci_dev *hdev, void *data) +int hci_abort_conn(struct hci_conn *conn, u8 reason) { - struct hci_conn *conn; - u16 handle = PTR_ERR(data); + int r = 0; - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; - return hci_abort_conn_sync(hdev, conn, conn->abort_reason); -} + switch (conn->state) { + case BT_CONNECTED: + case BT_CONFIG: + if (conn->type == AMP_LINK) { + struct hci_cp_disconn_phy_link cp; -int hci_abort_conn(struct hci_conn *conn, u8 reason) -{ - struct hci_dev *hdev = conn->hdev; + cp.phy_handle = HCI_PHY_HANDLE(conn->handle); + cp.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, + sizeof(cp), &cp); + } else { + struct hci_cp_disconnect dc; - /* If abort_reason has already been set it means the connection is - * already being aborted so don't attempt to overwrite it. - */ - if (conn->abort_reason) - return 0; + dc.handle = cpu_to_le16(conn->handle); + dc.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, + sizeof(dc), &dc); + } - bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); + conn->state = BT_DISCONN; - conn->abort_reason = reason; + break; + case BT_CONNECT: + if (conn->type == LE_LINK) { + if (test_bit(HCI_CONN_SCANNING, &conn->flags)) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); + } else if (conn->type == ACL_LINK) { + if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_CREATE_CONN_CANCEL, + 6, &conn->dst); + } + break; + case BT_CONNECT2: + if (conn->type == ACL_LINK) { + struct hci_cp_reject_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + rej.reason = reason; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_CONN_REQ, + sizeof(rej), &rej); + } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { + struct hci_cp_reject_sync_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + + /* SCO rejection has its own limited set of + * allowed error values (0x0D-0x0F) which isn't + * compatible with most values passed to this + * function. To be safe hard-code one of the + * values that's suitable for SCO. + */ + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; - /* If the connection is pending check the command opcode since that - * might be blocking on hci_cmd_sync_work while waiting its respective - * event so we need to hci_cmd_sync_cancel to cancel it. - */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { - switch (hci_skb_event(hdev->sent_cmd)) { - case HCI_EV_LE_CONN_COMPLETE: - case HCI_EV_LE_ENHANCED_CONN_COMPLETE: - case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, -ECANCELED); - break; + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_SYNC_CONN_REQ, + sizeof(rej), &rej); } + break; + default: + conn->state = BT_CLOSED; + break; } - return hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), - NULL); + return r; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c03729c10fdd6..31dd064d77a42 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5230,27 +5230,22 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, - struct hci_conn *conn, u8 reason) + struct hci_conn *conn) { - /* Return reason if scanning since the connection shall probably be - * cleanup directly. - */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - return reason; + return 0; - if (conn->role == HCI_ROLE_SLAVE || - test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } -static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, - u8 reason) +static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) { if (conn->type == LE_LINK) - return hci_le_connect_cancel_sync(hdev, conn, reason); + return hci_le_connect_cancel_sync(hdev, conn); if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; @@ -5303,11 +5298,9 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - err = hci_connect_cancel_sync(hdev, conn, reason); + err = hci_connect_cancel_sync(hdev, conn); /* Cleanup hci_conn object if it cannot be cancelled as it - * likelly means the controller and host stack are out of sync - * or in case of LE it was still scanning so it can be cleanup - * safely. + * likelly means the controller and host stack are out of sync. */ if (err) { hci_dev_lock(hdev); @@ -6222,7 +6215,7 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) done: if (err == -ETIMEDOUT) - hci_le_connect_cancel_sync(hdev, conn, 0x00); + hci_le_connect_cancel_sync(hdev, conn); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index efeea7994d22e..922938a7fd15b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3583,6 +3583,18 @@ unlock: return err; } +static int abort_conn_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn; + u16 handle = PTR_ERR(data); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + + return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); +} + static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3633,7 +3645,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); unlock: hci_dev_unlock(hdev); -- GitLab From 81cc178d7c879899421408e7b568569fe3b5693c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:45:24 +0000 Subject: [PATCH 1414/1418] Revert "io_uring: drop any code related to SCM_RIGHTS" This reverts commit a3812a47a32022ca76bf46ddacdd823dc2aabf8b which is commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I84857293b288c8a160b2567d4018568c36893eff Signed-off-by: Greg Kroah-Hartman --- include/linux/io_uring_types.h | 3 + io_uring/filetable.c | 10 ++- io_uring/io_uring.c | 31 ++++++- io_uring/rsrc.c | 151 ++++++++++++++++++++++++++++++++- io_uring/rsrc.h | 15 ++++ 5 files changed, 202 insertions(+), 8 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 37aeea266ebb3..f5b687a787a34 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,6 +330,9 @@ struct io_ring_ctx { struct list_head io_buffers_pages; + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 4660cb89ea9f5..b80614e7d6051 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,10 +95,12 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - return 0; + ret = io_scm_file_account(ctx, file); + if (!ret) { + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + } err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0bb392e015f24..06385141319b9 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -2627,6 +2628,12 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ + sock_release(ctx->ring_sock); + } +#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3431,12 +3438,32 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. + * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, + * we have to tie this fd to a socket for file garbage collection purposes. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + struct file *file; +#if defined(CONFIG_UNIX) + int ret; + + ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, + &ctx->ring_sock); + if (ret) + return ERR_PTR(ret); +#endif + + file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); +#if defined(CONFIG_UNIX) + if (IS_ERR(file)) { + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; + } +#endif + return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index ac658cfa89c63..7ada0339b3870 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,6 +494,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } + err = io_scm_file_account(ctx, file); + if (err) { + fput(file); + break; + } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -757,12 +762,22 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - if (!file) + /* skip scm accounted files, they'll be freed by ->ring_sock */ + if (!file || io_file_need_scm(file)) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) + kfree_skb(skb); + } +#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -790,11 +805,134 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } +/* + * Ensure the UNIX gc is aware of our file set, so we are certain that + * the io_uring can be safely unregistered on process exit, even if we have + * loops in the file referencing. We account only files that can hold other + * files because otherwise they can't form a loop and so are not interesting + * for GC. + */ +int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) +{ +#if defined(CONFIG_UNIX) + struct sock *sk = ctx->ring_sock->sk; + struct sk_buff_head *head = &sk->sk_receive_queue; + struct scm_fp_list *fpl; + struct sk_buff *skb; + + if (likely(!io_file_need_scm(file))) + return 0; + + /* + * See if we can merge this file into an existing skb SCM_RIGHTS + * file set. If there's no room, fall back to allocating a new skb + * and filling it in. + */ + spin_lock_irq(&head->lock); + skb = skb_peek(head); + if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) + __skb_unlink(skb, head); + else + skb = NULL; + spin_unlock_irq(&head->lock); + + if (!skb) { + fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); + if (!fpl) + return -ENOMEM; + + skb = alloc_skb(0, GFP_KERNEL); + if (!skb) { + kfree(fpl); + return -ENOMEM; + } + + fpl->user = get_uid(current_user()); + fpl->max = SCM_MAX_FD; + fpl->count = 0; + + UNIXCB(skb).fp = fpl; + skb->sk = sk; + skb->scm_io_uring = 1; + skb->destructor = unix_destruct_scm; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + } + + fpl = UNIXCB(skb).fp; + fpl->fp[fpl->count++] = get_file(file); + unix_inflight(fpl->user, file); + skb_queue_head(head, skb); + fput(file); +#endif + return 0; +} + static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; +#if defined(CONFIG_UNIX) + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff_head list, *head = &sock->sk_receive_queue; + struct sk_buff *skb; + int i; + + if (!io_file_need_scm(file)) { + fput(file); + return; + } + + __skb_queue_head_init(&list); + + /* + * Find the skb that holds this file in its SCM_RIGHTS. When found, + * remove this entry and rearrange the file array. + */ + skb = skb_dequeue(head); + while (skb) { + struct scm_fp_list *fp; + fp = UNIXCB(skb).fp; + for (i = 0; i < fp->count; i++) { + int left; + + if (fp->fp[i] != file) + continue; + + unix_notinflight(fp->user, fp->fp[i]); + left = fp->count - 1 - i; + if (left) { + memmove(&fp->fp[i], &fp->fp[i + 1], + left * sizeof(struct file *)); + } + fp->count--; + if (!fp->count) { + kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_tail(&list, skb); + } + fput(file); + file = NULL; + break; + } + + if (!file) + break; + + __skb_queue_tail(&list, skb); + + skb = skb_dequeue(head); + } + + if (skb_peek(&list)) { + spin_lock_irq(&head->lock); + while ((skb = __skb_dequeue(&list)) != NULL) + __skb_queue_tail(head, skb); + spin_unlock_irq(&head->lock); + } +#else fput(file); +#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -848,12 +986,21 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. + * Don't allow io_uring instances to be registered. If UNIX + * isn't enabled, then this causes a reference cycle and this + * instance can never get freed. If UNIX is enabled we'll + * handle it just fine, but there's still no point in allowing + * a ring fd as it doesn't support regular read/write anyway. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } + ret = io_scm_file_account(ctx, file); + if (ret) { + fput(file); + goto fail; + } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 85f145607c620..acaf8dad05401 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,6 +77,21 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); +int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); + +static inline bool io_file_need_scm(struct file *filp) +{ + return false; +} + +static inline int io_scm_file_account(struct io_ring_ctx *ctx, + struct file *file) +{ + if (likely(!io_file_need_scm(file))) + return 0; + return __io_scm_file_account(ctx, file); +} + int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, -- GitLab From 2b84f5edda9e2c08c4e2c37dc2b0d7692c341a99 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 May 2024 10:34:23 +0000 Subject: [PATCH 1415/1418] Revert "media: mc: Expand MUST_CONNECT flag to always require an enabled link" This reverts commit e2c545b841a7ae2c1796c9968fcf47166075cfb1 which is commit b3decc5ce7d778224d266423b542326ad469cb5f upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I94f10b3fe86210799b5697259e32114f00f080f0 Signed-off-by: Greg Kroah-Hartman --- .../media/mediactl/media-types.rst | 11 ++-- drivers/media/mc/mc-entity.c | 53 ++++--------------- 2 files changed, 16 insertions(+), 48 deletions(-) diff --git a/Documentation/userspace-api/media/mediactl/media-types.rst b/Documentation/userspace-api/media/mediactl/media-types.rst index 6332e8395263b..0ffeece1e0c8e 100644 --- a/Documentation/userspace-api/media/mediactl/media-types.rst +++ b/Documentation/userspace-api/media/mediactl/media-types.rst @@ -375,11 +375,12 @@ Types and flags used to represent the media graph elements are origins of links. * - ``MEDIA_PAD_FL_MUST_CONNECT`` - - If this flag is set, then for this pad to be able to stream, it must - be connected by at least one enabled link. There could be temporary - reasons (e.g. device configuration dependent) for the pad to need - enabled links even when this flag isn't set; the absence of the flag - doesn't imply there is none. + - If this flag is set and the pad is linked to any other pad, then + at least one of those links must be enabled for the entity to be + able to stream. There could be temporary reasons (e.g. device + configuration dependent) for the pad to need enabled links even + when this flag isn't set; the absence of the flag doesn't imply + there is none. One and only one of ``MEDIA_PAD_FL_SINK`` and ``MEDIA_PAD_FL_SOURCE`` diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 8919df09e3e8d..50b68b4dde5d0 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -509,15 +509,14 @@ static int media_pipeline_walk_push(struct media_pipeline_walk *walk, /* * Move the top entry link cursor to the next link. If all links of the entry - * have been visited, pop the entry itself. Return true if the entry has been - * popped. + * have been visited, pop the entry itself. */ -static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) +static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry; if (WARN_ON(walk->stack.top < 0)) - return false; + return; entry = media_pipeline_walk_top(walk); @@ -527,7 +526,7 @@ static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) walk->stack.top); walk->stack.top--; - return true; + return; } entry->links = entry->links->next; @@ -535,8 +534,6 @@ static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) dev_dbg(walk->mdev->dev, "media pipeline: moved entry %u to next link\n", walk->stack.top); - - return false; } /* Free all memory allocated while walking the pipeline. */ @@ -586,12 +583,11 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_link *link; struct media_pad *local; struct media_pad *remote; - bool last_link; int ret; origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); - last_link = media_pipeline_walk_pop(walk); + media_pipeline_walk_pop(walk); dev_dbg(walk->mdev->dev, "media pipeline: exploring link '%s':%u -> '%s':%u\n", @@ -616,7 +612,7 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); - goto done; + return 0; } /* @@ -631,44 +627,13 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (disabled)\n"); - goto done; + return 0; } ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; -done: - /* - * If we're done iterating over links, iterate over pads of the entity. - * This is necessary to discover pads that are not connected with any - * link. Those are dead ends from a pipeline exploration point of view, - * but are still part of the pipeline and need to be added to enable - * proper validation. - */ - if (!last_link) - return 0; - - dev_dbg(walk->mdev->dev, - "media pipeline: adding unconnected pads of '%s'\n", - local->entity->name); - - media_entity_for_each_pad(origin->entity, local) { - /* - * Skip the origin pad (already handled), pad that have links - * (already discovered through iterating over links) and pads - * not internally connected. - */ - if (origin == local || !local->num_links || - !media_entity_has_pad_interdep(origin->entity, origin->index, - local->index)) - continue; - - ret = media_pipeline_add_pad(pipe, walk, local); - if (ret) - return ret; - } - return 0; } @@ -780,6 +745,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, struct media_pad *pad = ppad->pad; struct media_entity *entity = pad->entity; bool has_enabled_link = false; + bool has_link = false; struct media_link *link; dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name, @@ -809,6 +775,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, /* Record if the pad has links and enabled links. */ if (link->flags & MEDIA_LNK_FL_ENABLED) has_enabled_link = true; + has_link = true; /* * Validate the link if it's enabled and has the @@ -846,7 +813,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set, * ensure that it has either no link or an enabled link. */ - if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && + if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link && !has_enabled_link) { dev_dbg(mdev->dev, "Pad '%s':%u must be connected by an enabled link\n", -- GitLab From 501c229a8a84b4f31084120cffd0d431ed90f1f0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 May 2024 10:31:24 +0000 Subject: [PATCH 1416/1418] Revert "media: mc: Add num_links flag to media_pad" This reverts commit cff51913c5cc42b45ea010e91eb0c1e349d4292f which is commit baeddf94aa61879b118f2faa37ed126d772670cc upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I5b874c8b01bdd8cdeed6dec216fdad500593f5a7 Signed-off-by: Greg Kroah-Hartman --- drivers/media/mc/mc-entity.c | 6 ------ include/media/media-entity.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 50b68b4dde5d0..030ef3d71b948 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -958,9 +958,6 @@ static void __media_entity_remove_link(struct media_entity *entity, /* Remove the reverse links for a data link. */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) { - link->source->num_links--; - link->sink->num_links--; - if (link->source->entity == entity) remote = link->sink->entity; else @@ -1072,9 +1069,6 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, sink->num_links++; source->num_links++; - link->source->num_links++; - link->sink->num_links++; - return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 03bb0963942bd..28c9de8a1f348 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -205,7 +205,6 @@ enum media_pad_signal_type { * @graph_obj: Embedded structure containing the media object common data * @entity: Entity this pad belongs to * @index: Pad index in the entity pads array, numbered from 0 to n - * @num_links: Number of links connected to this pad * @sig_type: Type of the signal inside a media pad * @flags: Pad flags, as defined in * :ref:`include/uapi/linux/media.h ` @@ -217,7 +216,6 @@ struct media_pad { struct media_gobj graph_obj; /* must be first field in struct */ struct media_entity *entity; u16 index; - u16 num_links; enum media_pad_signal_type sig_type; unsigned long flags; -- GitLab From 5f29666f6994b32097889bbdbd26e40ecf9a7954 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Apr 2024 15:30:52 +0000 Subject: [PATCH 1417/1418] Revert "timers: Rename del_timer_sync() to timer_delete_sync()" This reverts commit 113d5341ee1299aff7a6252cb19af52160ba22e8 which is commit 9b13df3fb64ee95e2397585404e442afee2c7d4f upstream. It breaks the Android kernel abi by turning del_timer_sync() into an inline function, which breaks the abi. Fix this by putting it back as needed AND fix up the only use of this new function in drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c which is what caused this commit to be backported to 5.4.274 in the first place. Bug: 161946584 Change-Id: Icd26c7c81e6172f36eeeb69827989bfab1d32afe Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- include/linux/timer.h | 15 +-------------- kernel/time/timer.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1fa7bf5eeff08..a264d98684ec4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -791,7 +791,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, scan_request = cfg->scan_request; cfg->scan_request = NULL; - timer_delete_sync(&cfg->escan_timeout); + del_timer_sync(&cfg->escan_timeout); if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ diff --git a/include/linux/timer.h b/include/linux/timer.h index f716fa30cc64c..effa3803bf31c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -187,20 +187,7 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -extern int timer_delete_sync(struct timer_list *timer); - -/** - * del_timer_sync - Delete a pending timer and wait for a running callback - * @timer: The timer to be deleted - * - * See timer_delete_sync() for detailed explanation. - * - * Do not use in new code. Use timer_delete_sync() instead. - */ -static inline int del_timer_sync(struct timer_list *timer) -{ - return timer_delete_sync(timer); -} +extern int del_timer_sync(struct timer_list *timer); #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 4132285a9e806..90e3fa492ce7c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1089,7 +1089,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, - * otherwise timer_delete_sync() can't detect that the timer's + * otherwise del_timer_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ @@ -1265,7 +1265,7 @@ EXPORT_SYMBOL_GPL(add_timer_on); * @timer: The timer to be deactivated * * The function only deactivates a pending timer, but contrary to - * timer_delete_sync() it does not take into account whether the timer's + * del_timer_sync() it does not take into account whether the timer's * callback function is concurrently executed on a different CPU or not. * It neither prevents rearming of the timer. If @timer can be rearmed * concurrently then the return value of this function is meaningless. @@ -1401,7 +1401,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } #endif /** - * timer_delete_sync - Deactivate a timer and wait for the handler to finish. + * del_timer_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, @@ -1423,10 +1423,10 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * spin_lock_irq(somelock); * * spin_lock(somelock); - * timer_delete_sync(mytimer); + * del_timer_sync(mytimer); * while (base->running_timer == mytimer); * - * Now timer_delete_sync() will never return and never release somelock. + * Now del_timer_sync() will never return and never release somelock. * The interrupt on the other CPU is waiting to grab somelock but it has * interrupted the softirq that CPU0 is waiting to finish. * @@ -1439,7 +1439,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ -int timer_delete_sync(struct timer_list *timer) +int del_timer_sync(struct timer_list *timer) { int ret; @@ -1479,7 +1479,7 @@ int timer_delete_sync(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(timer_delete_sync); +EXPORT_SYMBOL(del_timer_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), @@ -1501,8 +1501,8 @@ static void call_timer_fn(struct timer_list *timer, #endif /* * Couple the lock chain with the lock chain at - * timer_delete_sync() by acquiring the lock_map around the fn() - * call here and in timer_delete_sync(). + * del_timer_sync() by acquiring the lock_map around the fn() + * call here and in del_timer_sync(). */ lock_map_acquire(&lockdep_map); -- GitLab From b98ce0fe28ce79642f8b191a448e4ecf0356cad0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 May 2024 16:00:28 +0000 Subject: [PATCH 1418/1418] ANDROID: GKI: update the abi for tracing changes in 6.1.84 In 6.1.84, a number of internal tracing structures changed. Those structures are not used outside of the core kernel, but due to opaque pointers being carried into some abi signatures, they are tracked by the .stg file. Update the .stg file to handle these changes, as they are safe to modify at this point in time. The changes are: INFO: ABI DIFFERENCES HAVE BEEN DETECTED! INFO: type 'struct trace_buffer' changed byte size changed from 224 to 216 2 members ('bool time_stamp_abs' .. 'struct ring_buffer_ext_cb* ext_cb') changed offset changed by -64 type 'struct ring_buffer_per_cpu' changed byte size changed from 496 to 488 type 'struct rb_irq_work' changed byte size changed from 96 to 88 member 'long wait_index' was removed 3 members ('bool waiters_pending' .. 'bool wakeup_full') changed offset changed by -64 Fixes: 347385861c50 ("Linux 6.1.84") Signed-off-by: Greg Kroah-Hartman Change-Id: Id0c90b04188335ffa9a40db0397ed5a12080ca95 --- android/abi_gki_aarch64.stg | 43 ++++++++++------------- android/abi_gki_protected_exports_aarch64 | 1 + 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 11d5eeb2c32b0..b68a1f05e090e 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -86586,10 +86586,10 @@ member { offset: 58176 } member { - id: 0x58d13e53 + id: 0x58d13a5a name: "ext_cb" type_id: 0x3fefb42f - offset: 1728 + offset: 1664 } member { id: 0x50922243 @@ -95083,10 +95083,10 @@ member { offset: 448 } member { - id: 0xabba6f98 + id: 0xabba6185 name: "full_waiters_pending" type_id: 0x6d7f5ff6 - offset: 712 + offset: 648 } member { id: 0xf667dcee @@ -192851,10 +192851,10 @@ member { type_id: 0x92233392 } member { - id: 0xfeab2cc0 + id: 0xfeab2654 name: "time_stamp_abs" type_id: 0x6d7f5ff6 - offset: 1664 + offset: 1600 } member { id: 0xd43e1787 @@ -206871,12 +206871,6 @@ member { type_id: 0x74d29cf1 offset: 2880 } -member { - id: 0x7ab3f5b1 - name: "wait_index" - type_id: 0xfc0e1dbd - offset: 640 -} member { id: 0x7ab3febe name: "wait_index" @@ -207032,10 +207026,10 @@ member { offset: 256 } member { - id: 0xc133acab + id: 0xc133a8ef name: "waiters_pending" type_id: 0x6d7f5ff6 - offset: 704 + offset: 640 } member { id: 0x3e50f6f4 @@ -207316,10 +207310,10 @@ member { type_id: 0xe62ebf07 } member { - id: 0x3766071b + id: 0x37660e3b name: "wakeup_full" type_id: 0x6d7f5ff6 - offset: 720 + offset: 656 } member { id: 0xf5741316 @@ -252202,14 +252196,13 @@ struct_union { kind: STRUCT name: "rb_irq_work" definition { - bytesize: 96 + bytesize: 88 member_id: 0xd64000c8 member_id: 0x939cab56 member_id: 0x7f337099 - member_id: 0x7ab3f5b1 - member_id: 0xc133acab - member_id: 0xabba6f98 - member_id: 0x3766071b + member_id: 0xc133a8ef + member_id: 0xabba6185 + member_id: 0x37660e3b } } struct_union { @@ -253848,7 +253841,7 @@ struct_union { kind: STRUCT name: "ring_buffer_per_cpu" definition { - bytesize: 496 + bytesize: 488 member_id: 0x5161b0e6 member_id: 0x9fc7ddba member_id: 0xd712613b @@ -263139,7 +263132,7 @@ struct_union { kind: STRUCT name: "trace_buffer" definition { - bytesize: 224 + bytesize: 216 member_id: 0x2d2d0138 member_id: 0xe7102ce7 member_id: 0x9fc7d460 @@ -263151,8 +263144,8 @@ struct_union { member_id: 0x0f3caba2 member_id: 0x45d166c5 member_id: 0xedbbc0ae - member_id: 0xfeab2cc0 - member_id: 0x58d13e53 + member_id: 0xfeab2654 + member_id: 0x58d13a5a } } struct_union { diff --git a/android/abi_gki_protected_exports_aarch64 b/android/abi_gki_protected_exports_aarch64 index 7d97572e6175b..1f9b851b39475 100644 --- a/android/abi_gki_protected_exports_aarch64 +++ b/android/abi_gki_protected_exports_aarch64 @@ -96,6 +96,7 @@ hci_alloc_dev_priv hci_cmd_sync hci_cmd_sync_cancel hci_cmd_sync_queue +hci_cmd_sync_submit hci_conn_check_secure hci_conn_security hci_conn_switch_role -- GitLab