diff --git a/.mailmap b/.mailmap index cbdf5d63c381dcfe3afadb7c86abf804d9a93053..8ee01d9d70464bc0012d1bf87ca8fcef7ccd7545 100644 --- a/.mailmap +++ b/.mailmap @@ -166,6 +166,7 @@ Daniel Borkmann Daniel Borkmann David Brownell David Collins +David Heidelberg David Rheinsberg David Rheinsberg David Rheinsberg @@ -260,6 +261,7 @@ Jaegeuk Kim Jakub Kicinski James Bottomley James Bottomley +James Clark James E Wilson James Hogan James Hogan diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index c61f9b8139730cbe79466cfec81b82021ca26775..12488c14be64a089f2b8d9477fe8308e09da338b 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -14,9 +14,10 @@ Description: event to its internal Informational Event log, updates the Event Status register, and if configured, interrupts the host. It is not an error to inject poison into an address that - already has poison present and no error is returned. The - inject_poison attribute is only visible for devices supporting - the capability. + already has poison present and no error is returned. If the + device returns 'Inject Poison Limit Reached' an -EBUSY error + is returned to the user. The inject_poison attribute is only + visible for devices supporting the capability. What: /sys/kernel/debug/memX/clear_poison diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu index 307a55f599cb1716f229fd95651aadcd30ad24c3..35a8f6dae5bfdd35fb0e61131d9f63d97d8a4c50 100644 --- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu @@ -32,9 +32,9 @@ Description: (RW) The front button on the Turris Omnia router can be interrupt. This file switches between these two modes: - - "mcu" makes the button press event be handled by the MCU to - change the LEDs panel intensity. - - "cpu" makes the button press event be handled by the CPU. + - ``mcu`` makes the button press event be handled by the MCU to + change the LEDs panel intensity. + - ``cpu`` makes the button press event be handled by the CPU. Format: %s. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71fbf7830c917087f6fa1ff75044ec..de725ca3be825ba0e289afbf6d44d97f3760a477 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index fd4b56c0996f47d6f11c93db442eabca3f252264..c09674a75a9e3c712339b3dd3b830397dadf4c84 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -742,7 +742,7 @@ SecurityFlags Flags which control security negotiation and may use NTLMSSP 0x00080 must use NTLMSSP 0x80080 seal (packet encryption) 0x00040 - must seal (not implemented yet) 0x40040 + must seal 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index e625830d335ea70c7692ec7005e524900e729f36..552c9155165d7c89920f5bbd8d38a02e01386822 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -162,13 +162,14 @@ iv_large_sectors Module parameters:: -max_read_size -max_write_size - Maximum size of read or write requests. When a request larger than this size - is received, dm-crypt will split the request. The splitting improves - concurrency (the split requests could be encrypted in parallel by multiple - cores), but it also causes overhead. The user should tune these parameters to - fit the actual workload. + + max_read_size + max_write_size + Maximum size of read or write requests. When a request larger than this size + is received, dm-crypt will split the request. The splitting improves + concurrency (the split requests could be encrypted in parallel by multiple + cores), but it also causes overhead. The user should tune these parameters to + fit the actual workload. Example scripts diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c1134ad5f06d13f0b3e68e63cb6331fc893cd7fe..09126bb8cc9ffb140280c56895754a0e2c602f25 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3830,9 +3830,6 @@ noalign [KNL,ARM] - noaltinstr [S390,EARLY] Disables alternative instructions - patching (CPU alternatives feature). - noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any IOAPICs that may be present in the system. @@ -4801,11 +4798,9 @@ profile= [KNL] Enable kernel profiling via /proc/profile Format: [,] - Param: : "schedule", "sleep", or "kvm" + Param: : "schedule" or "kvm" [defaults to kernel profiling] Param: "schedule" - profile schedule points. - Param: "sleep" - profile D-state sleeping (millisecs). - Requires CONFIG_SCHEDSTATS Param: "kvm" - profile VM exits. Param: - step/bucket size as a power of 2 for statistical time based profiling. diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bb83c5d8c6755149d5317c7eeba5f3ced9dc6014..50327c05be8d1b77fc7af27c695dbb97740cbca1 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -122,10 +122,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1490853 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | @@ -138,8 +146,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | @@ -160,6 +174,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -170,6 +186,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 02eb4d98b7deabd0b58f4a1dd6eab7af9ed87462..85b709257918ade7590ebbf2f0f53dcdf8ed36be 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -239,28 +239,38 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was + mistakenly classified as a bitmask rather than a value. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing + the performance of misaligned scalar native word accesses on the selected set + of processors. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of + misaligned scalar accesses is unknown. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar + accesses are emulated via software, either in or below the kernel. These + accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native + word sized accesses are slower than the equivalent quantity of byte + accesses. Misaligned accesses may be supported directly in hardware, or + trapped and emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native + word sized accesses are faster than the equivalent quantity of byte + accesses. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar + accesses are not supported at all and will generate a misaligned address + fault. * :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which represents the size of the Zicboz block in bytes. * :c:macro:`RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS`: An unsigned long which represent the highest userspace virtual address usable. + +* :c:macro:`RISCV_HWPROBE_KEY_TIME_CSR_FREQ`: Frequency (in Hz) of `time CSR`. diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst index e476b4386bd9dba3c3f59b501369e169fecc003a..077b968dcc81057af43147598f8b10d5488d9398 100644 --- a/Documentation/arch/riscv/vm-layout.rst +++ b/Documentation/arch/riscv/vm-layout.rst @@ -47,11 +47,12 @@ RISC-V Linux Kernel SV39 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap - ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io - ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap - ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space - ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory + ffffffc4fea00000 | -236 GB | ffffffc4feffffff | 6 MB | fixmap + ffffffc4ff000000 | -236 GB | ffffffc4ffffffff | 16 MB | PCI io + ffffffc500000000 | -236 GB | ffffffc5ffffffff | 4 GB | vmemmap + ffffffc600000000 | -232 GB | ffffffd5ffffffff | 64 GB | vmalloc/ioremap space + ffffffd600000000 | -168 GB | fffffff5ffffffff | 128 GB | direct mapping of all physical memory + | | | | fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan __________________|____________|__________________|_________|____________________________________________________________ | diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml index c960c8e0a9a59193dc288dd0b778cce633bf43de..08b89b62c505b94fc8d099b34c8c9bfe7946d87e 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml @@ -30,7 +30,7 @@ description: | maintainers: - Mike Leach - Suzuki K Poulose - - James Clark + - James Clark - Mao Jinlong - Hao Zhang diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml index 6745b4cc8f1caaa98bf4d4c722dbb0c8d46df6b3..d50a60368e279a3ec52a77635851ca11d2ac5582 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml @@ -29,7 +29,7 @@ description: | maintainers: - Mike Leach - Suzuki K Poulose - - James Clark + - James Clark - Mao Jinlong - Hao Zhang diff --git a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml index b5e5767d86988d3a9b44501e960fd484509a3e82..13eaa8d9a16e5a4bd43b3e184f9277494acf27a1 100644 --- a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml @@ -35,6 +35,9 @@ properties: ports-implemented: const: 1 + power-domains: + maxItems: 1 + sata-port@0: $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml index a584b4953e686d8ad8fdea7813aa03bcd4505978..46403b98411f81d6d136d8c9e5602095b72e0e33 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml index 6b9c1d198b14d04c8999c0f6abb4ac36dae0e5d4..10afe984e2fbc1fd190b6cb901b0f1f5df43157f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on MSM8994 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml index a5a29dc75ae11b99681708a6cee6bca9c32bba4c..1fe68e07a2b208b7caffe181328ddc26e09cd20e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml index 2280b859b2ad6392e7d4912bf76731b665aa58e4..78e232fa95dc603b58848b45ffd2fc79b32577c0 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml index cf19f44af77440920371975d36ae8a3f427be928..4ff17a91344badaf572b15f657f0c740ea1859b7 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6115 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml index 374a1844a159a0e53752ef351bd2d5aaaf18bf05..10a9c96a97b6a1390736f701488311670017d911 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks and power domains on diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml index fd6658cb793dbb32526f7208f29a7cde9fa027b2..c03b30f64f359abd03e61a543522c81ce542e274 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Camera Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm camera clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml index 183b1c75dbdf3b1401f00f050158787c68669712..3cd422a645fd876b9f834a6f8389160f5aab4bf2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml index 147b75a21508f1e6431c816b4f1d0827dc612a0b..de4e9066eeb835fd8215c40fed58c4b2baa28504 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml index cf4cad76f6c9517ac244d72297c90cd747a8b24e..d9dd479c17bd63956a43055e007c3a612ef5b569 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml index 46d1d91e3a01ef2d106f3b4986b5f9a4fb12d2c7..5c2ecec0624e35bde493ee7b39dbe8afbf295e2f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM8350 Video Clock & Reset Controller maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm video clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml index 3c2cac14e6c3a0ae676ad160e363a90893d5678e..d10bb002906e9f55f1f1dedc570f5e835181a504 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM8450 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 8e8a288d318c34d8247abe0705f81c175fb15d99..e22b4c433fd07c2d92faefbdcaa5719c021e9041 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM6375 Display MDSS maintainers: - - Konrad Dybcio + - Konrad Dybcio description: SM6375 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 2399cabf044c2fdde7b17fea546b69d095559d69..dd614e077bbff3d2d52dddc5657c0bc40d8e49a4 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: |+ This panel seems to only be found in the Asus Z00T diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 765ca155c83a68c044f93f68a4b00d6257e4437d..032f783eefc4508df35da10e53ca20ff8b1b9bdf 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -14,7 +14,16 @@ allOf: properties: compatible: - const: samsung,atna33xc20 + oneOf: + # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel + - const: samsung,atna33xc20 + - items: + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 + - const: samsung,atna33xc20 enable-gpios: true port: true diff --git a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml index 191b692125e145748003a800ee663e0c09d9e3cc..032a989184ff057cb28116ef2e2fcf001cfa2f40 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony TD4353 JDI 5 / 5.7" 2160x1080 MIPI-DSI Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Sony TD4353 JDI is a 5 (XZ2c) / 5.7 (XZ2) inch 2160x1080 diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 1715b0c9feeafe9153d30a814135385201446767..c31e5e719525015ad4f06a3f7164a0b1e874901c 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -28,6 +28,7 @@ properties: - anvo,anv32e61w - atmel,at25256B - fujitsu,mb85rs1mt + - fujitsu,mb85rs256 - fujitsu,mb85rs64 - microchip,at25160bn - microchip,25lc040 diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index 113957ebe9f1df4a12928d66c04bbb1cf3defc78..e25fa72fd7857767bbecc21977a5ef02c9ce3437 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -91,6 +91,7 @@ patternProperties: - const: 0 - description: | Shall encode the I3C LVR (Legacy Virtual Register): + See include/dt-bindings/i3c/i3c.h bit[31:8]: unused/ignored bit[7:5]: I2C device index. Possible values: * 0: I2C device has a 50 ns spike filter @@ -153,6 +154,8 @@ additionalProperties: true examples: - | + #include + i3c@d040000 { compatible = "cdns,i3c-master"; clocks = <&coreclock>, <&i3csysclock>; @@ -166,7 +169,7 @@ examples: /* I2C device. */ eeprom@57 { compatible = "atmel,24c01"; - reg = <0x57 0x0 0x10>; + reg = <0x57 0x0 (I2C_FM | I2C_FILTER)>; pagesize = <0x8>; }; diff --git a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml index c0e805e531be231817bac96206b4e0624b4881bf..4fc13e3c0f75e7e9a63aff26c35b6593af59491a 100644 --- a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml @@ -20,7 +20,16 @@ properties: maxItems: 1 clocks: - maxItems: 1 + minItems: 1 + items: + - description: Core clock + - description: APB clock + + clock-names: + minItems: 1 + items: + - const: core + - const: apb interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml index a03da9489ed916621918c3f370c56a0721f70a41..190889c7b62ae287875264b088946ba12ccda100 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml @@ -120,9 +120,8 @@ patternProperties: description: Positive input can be connected to pins AIN1 to AIN16 by choosing the appropriate value from 1 to 16. Negative input is connected to AINCOM. - items: - minimum: 1 - maximum: 16 + minimum: 1 + maximum: 16 oneOf: - required: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml index 9fce7203bd42dbb57f0bf5d380b70d463ab1aaf6..78210791496f84c49989d6cb99a328ad997025b1 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC7280 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml index 6c2da03f0cd2241bce21c62d5517ab80c4793f14..100c686369092687e78f7c039135d63b5a02343b 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC8280XP maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml index 3cff7e66225538ff593fe31202f4561631a6b850..300640a533dd67e59aa9d16f06500c152b1510ef 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SM8450 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index 571e5746d1776f5b2ce0c0a90632a627fb4e75ab..f8cebc9e8cd9d46b449cd297153dbebe5c84bf3f 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies legacy IOMMU implementations maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm "B" family devices which are not compatible with arm-smmu have diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index a1b71b35319e70f0b1a12a4c84fa1d95c7b08d76..42f9843d1868ac1177589e396527847e4b04e567 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -38,6 +38,10 @@ properties: managed: true + phys: + description: A reference to the SerDes lane(s) + maxItems: 1 + required: - reg diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index bd3cbb44c99a4638e9b9aa57c95c8b3def7171e6..e75393b3d196c58f13c7c2f0eb47ac5ab69e250a 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. MDM9607 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm MDM9607 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml index a4771f87d93645549d9cfbb8b39315065d0b8cf6..b262af6be97da3207e2711a1367e400b2a25adae 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6350 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6350 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml index 047f82863f9bbfdfcd870a35656d0b56e6c018ba..c11af09c3f5b89e19e82d6f77cd72a69dc3a6afd 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6375 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6375 SoC. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml index 7afafde17a38bfcb41abfb960841b054abf9faa7..61cf4fe19ca53ebe33397ccbdcf29b936476eef0 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml @@ -8,7 +8,7 @@ title: Qualcomm Resource Power Manager (RPM) Processor/Subsystem maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio - Stephan Gerhold description: | diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml index 9410404f87f1afb864c79614ada2659745a37ae9..ad2dcc39a5f5480f31057275d71c76e5ab864890 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. (QTI) RPM Master Stats maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Qualcomm RPM (Resource Power Manager) architecture includes a concept diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml index beb0ff0245b0ab84477b282243d9ce43c633c078..a65b1d1d5fdd9c50de429998f0d23c2fbbffa369 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml @@ -199,10 +199,11 @@ additionalProperties: false examples: - | + #include codec@1,0{ compatible = "slim217,250"; reg = <1 0>; - reset-gpios = <&tlmm 64 0>; + reset-gpios = <&tlmm 64 GPIO_ACTIVE_LOW>; slim-ifc-dev = <&wcd9340_ifd>; #sound-dai-cells = <1>; interrupt-parent = <&tlmm>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml index de397d879acc2e6e95e740cd4a2139bc88ff4c98..f94203798f24b36e33ff18dd41056e682d034a6e 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml @@ -42,7 +42,7 @@ examples: pinctrl-names = "default", "sleep"; pinctrl-0 = <&wcd_reset_n>; pinctrl-1 = <&wcd_reset_n_sleep>; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p8>; vdd-rxtx-supply = <&vreg_l18b_1p8>; vdd-px-supply = <&vreg_l18b_1p8>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml index cf6c3787adfeff846a13391775fa188c15d17e33..10531350c3362788d1943100808ff9cf7d7036cc 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml @@ -34,9 +34,10 @@ unevaluatedProperties: false examples: - | + #include codec { compatible = "qcom,wcd9380-codec"; - reset-gpios = <&tlmm 32 0>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd938x_tx>; qcom,rx-device = <&wcd938x_rx>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml index 6e76f6a8634f0761c67e2bd3f57268dcb347e66e..c69291f4d575ca5b4fc037ac4d59c91f0df74cd1 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml @@ -52,10 +52,10 @@ unevaluatedProperties: false examples: - | - #include + #include codec { compatible = "qcom,wcd9390-codec"; - reset-gpios = <&tlmm 32 IRQ_TYPE_NONE>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd939x_tx>; qcom,rx-device = <&wcd939x_rx>; diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 5d3dc952770da4005fbba1f9a2be8759f1d95932..7913ca9b6b54020c58e387b3618922386ce03763 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -328,7 +328,9 @@ properties: - renesas,hs3001 # Renesas ISL29501 time-of-flight sensor - renesas,isl29501 - # Rohm DH2228FV + # Rohm BH2228FV 8 channel DAC + - rohm,bh2228fv + # Rohm DH2228FV - This device does not exist, use rohm,bh2228fv instead. - rohm,dh2228fv # S524AD0XF1 (128K/256K-bit Serial EEPROM for Low Power) - samsung,24ad0xd1 diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index 783c27591e564c7b8020d0be5c603f557661cda7..245e8c3ce6699342fd610c15df5806f2e71a4c13 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -18,6 +18,7 @@ properties: - usb424,2412 - usb424,2417 - usb424,2514 + - usb424,2517 reg: true diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml index 69845ec32e818bbd9ba43048c7a84fcafcbd49e0..d0eff1ea52b4f166de3b198daaa929551fd1375f 100644 --- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml @@ -21,6 +21,7 @@ properties: - amlogic,t7-wdt - items: - enum: + - amlogic,a4-wdt - amlogic,c3-wdt - amlogic,s4-wdt - const: amlogic,t7-wdt diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 036e49553542b332d49921305fdd2f17c491e5a1..12b82725d322940824710ed5a85b913c03d192ab 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -9,4 +9,6 @@ Compute Express Link memory-devices + maturity-map + .. only:: subproject and html diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst new file mode 100644 index 0000000000000000000000000000000000000000..df8e2ac2a320c9047244854c8817905d5adab405 --- /dev/null +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -0,0 +1,202 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=========================================== +Compute Express Link Subsystem Maturity Map +=========================================== + +The Linux CXL subsystem tracks the dynamic `CXL specification +`_ that +continues to respond to new use cases with new features, capability +updates and fixes. At any given point some aspects of the subsystem are +more mature than others. While the periodic pull requests summarize the +`work being incorporated each merge window +`_, +those do not always convey progress relative to a starting point and a +future end goal. + +What follows is a coarse breakdown of the subsystem's major +responsibilities along with a maturity score. The expectation is that +the change-history of this document provides an overview summary of the +subsystem maturation over time. + +The maturity scores are: + +- [3] Mature: Work in this area is complete and no changes on the horizon. + Note that this score can regress from one kernel release to the next + based on new test results or end user reports. + +- [2] Stabilizing: Major functionality operational, common cases are + mature, but known corner cases are still a work in progress. + +- [1] Initial: Capability that has exited the Proof of Concept phase, but + may still have significant gaps to close and fixes to apply as real + world testing occurs. + +- [0] Known gap: Feature is on a medium to long term horizon to + implement. If the specification has a feature that does not even have + a '0' score in this document, there is a good chance that no one in + the linux-cxl@vger.kernel.org community has started to look at it. + +- X: Out of scope for kernel enabling, or kernel enabling not required + +Feature and Capabilities +======================== + +Enumeration / Provisioning +-------------------------- +All of the fundamental enumeration an object model of the subsystem is +in place, but there are several corner cases that are pending closure. + + +* [2] CXL Window Enumeration + + * [0] :ref:`Extended-linear memory-side cache ` + * [0] Low Memory-hole + * [0] Hetero-interleave + +* [2] Switch Enumeration + + * [0] CXL register enumeration link-up dependency + +* [2] HDM Decoder Configuration + + * [0] Decoder target and granularity constraints + +* [2] Performance enumeration + + * [3] Endpoint CDAT + * [3] Switch CDAT + * [1] CDAT to Core-mm integration + + * [1] x86 + * [0] Arm64 + * [0] All other arch. + + * [0] Shared link + +* [2] Hotplug + (see CXL Window Enumeration) + + * [0] Handle Soft Reserved conflicts + +* [0] :ref:`RCH link status ` +* [0] Fabrics / G-FAM (chapter 7) +* [0] Global Access Endpoint + + +RAS +--- +In many ways CXL can be seen as a standardization of what would normally +be handled by custom EDAC drivers. The open development here is +mainly caused by the enumeration corner cases above. + +* [3] Component events (OS) +* [2] Component events (FFM) +* [1] Endpoint protocol errors (OS) +* [1] Endpoint protocol errors (FFM) +* [0] Switch protocol errors (OS) +* [1] Switch protocol errors (FFM) +* [2] DPA->HPA Address translation + + * [1] XOR Interleave translation + (see CXL Window Enumeration) + +* [1] Memory Failure coordination +* [0] Scrub control +* [2] ACPI error injection EINJ + + * [0] EINJ v2 + * [X] Compliance DOE + +* [2] Native error injection +* [3] RCH error handling +* [1] VH error handling +* [0] PPR +* [0] Sparing +* [0] Device built in test + + +Mailbox commands +---------------- + +* [3] Firmware update +* [3] Health / Alerts +* [1] :ref:`Background commands ` +* [3] Sanitization +* [3] Security commands +* [3] RAW Command Debug Passthrough +* [0] CEL-only-validation Passthrough +* [0] Switch CCI +* [3] Timestamp +* [1] PMEM labels +* [0] PMEM GPF / Dirty Shutdown +* [0] Scan Media + +PMU +--- +* [1] Type 3 PMU +* [0] Switch USP/ DSP, Root Port + +Security +-------- + +* [X] CXL Trusted Execution Environment Security Protocol (TSP) +* [X] CXL IDE (subsumed by TSP) + +Memory-pooling +-------------- + +* [1] Hotplug of LDs (via PCI hotplug) +* [0] Dynamic Capacity Device (DCD) Support + +Multi-host sharing +------------------ + +* [0] Hardware coherent shared memory +* [0] Software managed coherency shared memory + +Multi-host memory +----------------- + +* [0] Dynamic Capacity Device Support +* [0] Sharing + +Accelerator +----------- + +* [0] Accelerator memory enumeration HDM-D (CXL 1.1/2.0 Type-2) +* [0] Accelerator memory enumeration HDM-DB (CXL 3.0 Type-2) +* [0] CXL.cache 68b (CXL 2.0) +* [0] CXL.cache 256b Cache IDs (CXL 3.0) + +User Flow Support +----------------- + +* [0] HPA->DPA Address translation (need xormaps export solution) + +Details +======= + +.. _extended-linear: + +* **Extended-linear memory-side cache**: An HMAT proposal to enumerate the presence of a + memory-side cache where the cache capacity extends the SRAT address + range capacity. `See the ECN + `_ + for more details: + +.. _rch-link-status: + +* **RCH Link Status**: RCH (Restricted CXL Host) topologies, end up + hiding some standard registers like PCIe Link Status / Capabilities in + the CXL RCRB (Root Complex Register Block). + +.. _background-commands: + +* **Background commands**: The CXL background command mechanism is + awkward as the single slot is monopolized potentially indefinitely by + various commands. A `cancel on conflict + `_ + facility is needed to make sure the kernel can ensure forward progress + of priority commands. diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index 6c1175c6afba99ce914325fb87cd71f56f643e07..978198f8a18bed3e1aa19d8f4009b524583feed4 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -4,8 +4,6 @@ Generic Thermal Sysfs driver How To Written by Sujith Thomas , Zhang Rui -Updated: 2 January 2008 - Copyright (c) 2008 Intel Corporation @@ -38,23 +36,23 @@ temperature) and throttle appropriate devices. :: - struct thermal_zone_device - *thermal_zone_device_register(char *type, - int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay)) + struct thermal_zone_device * + thermal_zone_device_register_with_trips(const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + unsigned int passive_delay, + unsigned int polling_delay) - This interface function adds a new thermal zone device (sensor) to + This interface function adds a new thermal zone device (sensor) to the /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the - thermal cooling devices registered at the same time. + thermal cooling devices registered to it at the same time. type: the thermal zone type. trips: - the total number of trip points this thermal zone supports. - mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writable. + the table of trip points for this thermal zone. devdata: device private data ops: @@ -67,32 +65,29 @@ temperature) and throttle appropriate devices. .get_temp: get the current temperature of the thermal zone. .set_trips: - set the trip points window. Whenever the current temperature - is updated, the trip points immediately below and above the - current temperature are found. - .get_mode: - get the current mode (enabled/disabled) of the thermal zone. - - - "enabled" means the kernel thermal management is - enabled. - - "disabled" will prevent kernel thermal driver action - upon trip points so that user applications can take - charge of thermal management. - .set_mode: - set the mode (enabled/disabled) of the thermal zone. - .get_trip_type: - get the type of certain trip point. - .get_trip_temp: - get the temperature above which the certain trip point - will be fired. + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .change_mode: + change the mode (enabled/disabled) of the thermal zone. + .set_trip_temp: + set the temperature of a given trip point. + .get_crit_temp: + get the critical temperature for this thermal zone. .set_emul_temp: - set the emulation temperature which helps in debugging - different threshold temperature points. + set the emulation temperature which helps in debugging + different threshold temperature points. + .get_trend: + get the trend of most recent zone temperature changes. + .hot: + hot trip point crossing handler. + .critical: + critical trip point crossing handler. tzp: thermal zone platform parameters. passive_delay: - number of milliseconds to wait between polls when - performing passive cooling. + number of milliseconds to wait between polls when performing passive + cooling. polling_delay: number of milliseconds to wait between polls when checking whether trip points have been crossed (0 for interrupt driven systems). diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst index a74d7b052dc1305a8adbcd374438327605e92fb8..de1f32526cc1c4c9c65af2bde0da200ec42c4d98 100644 --- a/Documentation/filesystems/caching/fscache.rst +++ b/Documentation/filesystems/caching/fscache.rst @@ -318,10 +318,10 @@ where the columns are: Debugging ========= -If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime -debugging enabled by adjusting the value in:: +If CONFIG_NETFS_DEBUG is enabled, the FS-Cache facility and NETFS support can +have runtime debugging enabled by adjusting the value in:: - /sys/module/fscache/parameters/debug + /sys/module/netfs/parameters/debug This is a bitmask of debugging streams to enable: @@ -343,6 +343,6 @@ This is a bitmask of debugging streams to enable: The appropriate set of values should be OR'd together and the result written to the control file. For example:: - echo $((1|8|512)) >/sys/module/fscache/parameters/debug + echo $((1|8|512)) >/sys/module/netfs/parameters/debug will turn on all function entry debugging. diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f836b50dcb8b3e85de56d01da968f..c293f8e37468cddacafb5d3edc615134408d6b3b 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11f49959d700e38fbfe115d71fd40c..67cb68ea6e6880db27cf4d192ec35cb3daf0bb1c 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. diff --git a/Documentation/gpu/amdgpu/display/dcn-blocks.rst b/Documentation/gpu/amdgpu/display/dcn-blocks.rst index a3fbd3ea028bba6fc256b0e018f512606fd57752..5e34366f6dbe4d57cd2f58448b628b163d32071f 100644 --- a/Documentation/gpu/amdgpu/display/dcn-blocks.rst +++ b/Documentation/gpu/amdgpu/display/dcn-blocks.rst @@ -8,37 +8,22 @@ and the code documentation when it is automatically generated. DCHUBBUB -------- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :internal: - HUBP ---- .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :internal: - DPP --- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h :internal: MPC @@ -47,11 +32,9 @@ MPC .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :export: - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h :internal: + :no-identifiers: mpcc_blnd_cfg mpcc_alpha_blend_mode OPP --- @@ -59,20 +42,14 @@ OPP .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h - :export: - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h :internal: DIO --- -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c :doc: overview -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h - :export: - -.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c :internal: diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 67a811e6891fb3b0266bb253fd4c8997501fab8d..b269ff3f7a545d84b2f930bce3821f9221e0c0f8 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -132,7 +132,7 @@ The DRM blend mode and its elements are then mapped by AMDGPU display manager (MPC), as follows: .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :functions: mpcc_blnd_cfg + :identifiers: mpcc_blnd_cfg Therefore, the blending configuration for a single MPCC instance on the MPC tree is defined by :c:type:`mpcc_blnd_cfg`, where @@ -144,7 +144,7 @@ alpha and plane alpha values. It sets one of the three modes for :c:type:`MPCC_ALPHA_BLND_MODE`, as described below. .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h - :functions: mpcc_alpha_blend_mode + :identifiers: mpcc_alpha_blend_mode DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM blend formula, as follows: diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 495e35fcfb218c8bc667011a43c2bd9b74229bbf..ea21fe135b97eb26f2d90967a31128799e92f2e7 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1753,6 +1753,7 @@ operations: request: attributes: - header + - context reply: attributes: - header @@ -1761,7 +1762,6 @@ operations: - indir - hkey - input_xfrm - dump: *rss-get-op - name: plca-get-cfg doc: Get PLCA params. diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 3ab423b80e913c2b7293727307777827c6e26449..d5f246aceb9f481a5ad0db84937aada30e1e81dd 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1875,6 +1875,7 @@ Kernel response contents: ===================================== ====== ========================== ``ETHTOOL_A_RSS_HEADER`` nested reply header + ``ETHTOOL_A_RSS_CONTEXT`` u32 context number ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 1497e80f030e0007f1e4905fe37f23ffa78d0167..3fc63f27c226d8b3d563e9d3e6379ef66e48a3e4 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -89,14 +89,7 @@ docs on :ref:`Building Linux with Clang/LLVM `. Rust (optional) --------------- -A particular version of the Rust toolchain is required. Newer versions may or -may not work because the kernel depends on some unstable Rust features, for -the moment. - -Each Rust toolchain comes with several "components", some of which are required -(like ``rustc``) and some that are optional. The ``rust-src`` component (which -is optional) needs to be installed to build the kernel. Other components are -useful for developing. +A recent version of the Rust compiler is required. Please see Documentation/rust/quick-start.rst for instructions on how to satisfy the build requirements of Rust support. In particular, the ``Makefile`` diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6e9a4597bf2cbbb9ddb3d9b727c07150efa37f25..daebce49cfdf5ff7d59ea1c23e00d2493d5238a0 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -13,9 +13,9 @@ kernel. Hardware issues like Meltdown, Spectre, L1TF etc. must be treated differently because they usually affect all Operating Systems ("OS") and therefore need coordination across different OS vendors, distributions, -hardware vendors and other parties. For some of the issues, software -mitigations can depend on microcode or firmware updates, which need further -coordination. +silicon vendors, hardware integrators, and other parties. For some of the +issues, software mitigations can depend on microcode or firmware updates, +which need further coordination. .. _Contact: @@ -32,8 +32,8 @@ Linux kernel security team (:ref:`Documentation/admin-guide/ `) instead. The team can be contacted by email at . This -is a private list of security officers who will help you to coordinate a -fix according to our documented process. +is a private list of security officers who will help you coordinate a fix +according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -43,7 +43,7 @@ the following URLs: - PGP: https://www.kernel.org/static/files/hardware-security.asc - S/MIME: https://www.kernel.org/static/files/hardware-security.crt -While hardware security issues are often handled by the affected hardware +While hardware security issues are often handled by the affected silicon vendor, we welcome contact from researchers or individuals who have identified a potential hardware flaw. @@ -65,7 +65,7 @@ of Linux Foundation's IT operations personnel technically have the ability to access the embargoed information, but are obliged to confidentiality by their employment contract. Linux Foundation IT personnel are also responsible for operating and managing the rest of -kernel.org infrastructure. +kernel.org's infrastructure. The Linux Foundation's current director of IT Project infrastructure is Konstantin Ryabitsev. @@ -85,7 +85,7 @@ Memorandum of Understanding The Linux kernel community has a deep understanding of the requirement to keep hardware security issues under embargo for coordination between -different OS vendors, distributors, hardware vendors and other parties. +different OS vendors, distributors, silicon vendors, and other parties. The Linux kernel community has successfully handled hardware security issues in the past and has the necessary mechanisms in place to allow @@ -103,11 +103,11 @@ the issue in the best technical way. All involved developers pledge to adhere to the embargo rules and to keep the received information confidential. Violation of the pledge will lead to immediate exclusion from the current issue and removal from all related -mailing-lists. In addition, the hardware security team will also exclude +mailing lists. In addition, the hardware security team will also exclude the offender from future issues. The impact of this consequence is a highly effective deterrent in our community. In case a violation happens the hardware security team will inform the involved parties immediately. If you -or anyone becomes aware of a potential violation, please report it +or anyone else becomes aware of a potential violation, please report it immediately to the Hardware security officers. @@ -124,14 +124,16 @@ method for these types of issues. Start of Disclosure """"""""""""""""""" -Disclosure starts by contacting the Linux kernel hardware security team by -email. This initial contact should contain a description of the problem and -a list of any known affected hardware. If your organization builds or -distributes the affected hardware, we encourage you to also consider what -other hardware could be affected. +Disclosure starts by emailing the Linux kernel hardware security team per +the Contact section above. This initial contact should contain a +description of the problem and a list of any known affected silicon. If +your organization builds or distributes the affected hardware, we encourage +you to also consider what other hardware could be affected. The disclosing +party is responsible for contacting the affected silicon vendors in a +timely manner. The hardware security team will provide an incident-specific encrypted -mailing-list which will be used for initial discussion with the reporter, +mailing list which will be used for initial discussion with the reporter, further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of @@ -158,8 +160,8 @@ This serves several purposes: - The disclosed entities can be contacted to name experts who should participate in the mitigation development. - - If an expert which is required to handle an issue is employed by an - listed entity or member of an listed entity, then the response teams can + - If an expert who is required to handle an issue is employed by a listed + entity or member of an listed entity, then the response teams can request the disclosure of that expert from that entity. This ensures that the expert is also part of the entity's response team. @@ -169,8 +171,8 @@ Disclosure The disclosing party provides detailed information to the initial response team via the specific encrypted mailing-list. -From our experience the technical documentation of these issues is usually -a sufficient starting point and further technical clarification is best +From our experience, the technical documentation of these issues is usually +a sufficient starting point, and further technical clarification is best done via email. Mitigation development @@ -179,57 +181,93 @@ Mitigation development The initial response team sets up an encrypted mailing-list or repurposes an existing one if appropriate. -Using a mailing-list is close to the normal Linux development process and -has been successfully used in developing mitigations for various hardware +Using a mailing list is close to the normal Linux development process and +has been successfully used to develop mitigations for various hardware security issues in the past. -The mailing-list operates in the same way as normal Linux development. -Patches are posted, discussed and reviewed and if agreed on applied to a -non-public git repository which is only accessible to the participating +The mailing list operates in the same way as normal Linux development. +Patches are posted, discussed, and reviewed and if agreed upon, applied to +a non-public git repository which is only accessible to the participating developers via a secure connection. The repository contains the main development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed. Bringing in experts can happen at any -time of the development process and needs to be handled in a timely manner. +kernel developer community as needed. Any involved party can suggest +further experts to be included, each of which will be subject to the same +requirements outlined above. -If an expert is employed by or member of an entity on the disclosure list +Bringing in experts can happen at any time in the development process and +needs to be handled in a timely manner. + +If an expert is employed by or a member of an entity on the disclosure list provided by the disclosing party, then participation will be requested from the relevant entity. -If not, then the disclosing party will be informed about the experts +If not, then the disclosing party will be informed about the experts' participation. The experts are covered by the Memorandum of Understanding -and the disclosing party is requested to acknowledge the participation. In -case that the disclosing party has a compelling reason to object, then this -objection has to be raised within five work days and resolved with the -incident team immediately. If the disclosing party does not react within -five work days this is taken as silent acknowledgement. +and the disclosing party is requested to acknowledge their participation. +In the case where the disclosing party has a compelling reason to object, +any objection must to be raised within five working days and resolved with +the incident team immediately. If the disclosing party does not react +within five working days this is taken as silent acknowledgment. -After acknowledgement or resolution of an objection the expert is disclosed -by the incident team and brought into the development process. +After the incident team acknowledges or resolves an objection, the expert +is disclosed and brought into the development process. List participants may not communicate about the issue outside of the private mailing list. List participants may not use any shared resources (e.g. employer build farms, CI systems, etc) when working on patches. +Early access +"""""""""""" + +The patches discussed and developed on the list can neither be distributed +to any individual who is not a member of the response team nor to any other +organization. + +To allow the affected silicon vendors to work with their internal teams and +industry partners on testing, validation, and logistics, the following +exception is provided: + + Designated representatives of the affected silicon vendors are + allowed to hand over the patches at any time to the silicon + vendor’s response team. The representative must notify the kernel + response team about the handover. The affected silicon vendor must + have and maintain their own documented security process for any + patches shared with their response team that is consistent with + this policy. + + The silicon vendor’s response team can distribute these patches to + their industry partners and to their internal teams under the + silicon vendor’s documented security process. Feedback from the + industry partners goes back to the silicon vendor and is + communicated by the silicon vendor to the kernel response team. + + The handover to the silicon vendor’s response team removes any + responsibility or liability from the kernel response team regarding + premature disclosure, which happens due to the involvement of the + silicon vendor’s internal teams or industry partners. The silicon + vendor guarantees this release of liability by agreeing to this + process. Coordinated release """"""""""""""""""" -The involved parties will negotiate the date and time where the embargo -ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. There is no pre-notification process: -fixes are published in public and available to everyone at the same time. +The involved parties will negotiate the date and time when the embargo +ends. At that point, the prepared mitigations are published into the +relevant kernel trees. There is no pre-notification process: the +mitigations are published in public and available to everyone at the same +time. While we understand that hardware security issues need coordinated embargo -time, the embargo time should be constrained to the minimum time which is -required for all involved parties to develop, test and prepare the +time, the embargo time should be constrained to the minimum time that is +required for all involved parties to develop, test, and prepare their mitigations. Extending embargo time artificially to meet conference talk -dates or other non-technical reasons is creating more work and burden for -the involved developers and response teams as the patches need to be kept -up to date in order to follow the ongoing upstream kernel development, -which might create conflicting changes. +dates or other non-technical reasons creates more work and burden for the +involved developers and response teams as the patches need to be kept up to +date in order to follow the ongoing upstream kernel development, which +might create conflicting changes. CVE assignment """""""""""""" @@ -275,34 +313,35 @@ an involved disclosed party. The current ambassadors list: If you want your organization to be added to the ambassadors list, please contact the hardware security team. The nominated ambassador has to -understand and support our process fully and is ideally well connected in +understand and support our process fully and is ideally well-connected in the Linux kernel community. Encrypted mailing-lists ----------------------- -We use encrypted mailing-lists for communication. The operating principle +We use encrypted mailing lists for communication. The operating principle of these lists is that email sent to the list is encrypted either with the -list's PGP key or with the list's S/MIME certificate. The mailing-list +list's PGP key or with the list's S/MIME certificate. The mailing list software decrypts the email and re-encrypts it individually for each subscriber with the subscriber's PGP key or S/MIME certificate. Details -about the mailing-list software and the setup which is used to ensure the +about the mailing list software and the setup that is used to ensure the security of the lists and protection of the data can be found here: https://korg.wiki.kernel.org/userdoc/remail. List keys ^^^^^^^^^ -For initial contact see :ref:`Contact`. For incident specific mailing-lists -the key and S/MIME certificate are conveyed to the subscribers by email -sent from the specific list. +For initial contact see the :ref:`Contact` section above. For incident +specific mailing lists, the key and S/MIME certificate are conveyed to the +subscribers by email sent from the specific list. -Subscription to incident specific lists +Subscription to incident-specific lists ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Subscription is handled by the response teams. Disclosed parties who want -to participate in the communication send a list of potential subscribers to -the response team so the response team can validate subscription requests. +Subscription to incident-specific lists is handled by the response teams. +Disclosed parties who want to participate in the communication send a list +of potential experts to the response team so the response team can validate +subscription requests. Each subscriber needs to send a subscription request to the response team by email. The email must be signed with the subscriber's PGP key or S/MIME diff --git a/Documentation/rust/general-information.rst b/Documentation/rust/general-information.rst index 4bb6ac12d482c5c98a03de072f318254e3d941cb..e3f388ef4ee42332f26005151fcff53b817d48ef 100644 --- a/Documentation/rust/general-information.rst +++ b/Documentation/rust/general-information.rst @@ -7,6 +7,14 @@ This document contains useful information to know when working with the Rust support in the kernel. +``no_std`` +---------- + +The Rust support in the kernel can link only `core `_, +but not `std `_. Crates for use in the +kernel must opt into this behavior using the ``#![no_std]`` attribute. + + Code documentation ------------------ diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst index cc3f11e0d441f8fabe165be13bdee9055cbce712..d06a36106cd458cc48060448c83cbe6f51622e50 100644 --- a/Documentation/rust/quick-start.rst +++ b/Documentation/rust/quick-start.rst @@ -5,17 +5,93 @@ Quick Start This document describes how to get started with kernel development in Rust. +There are a few ways to install a Rust toolchain needed for kernel development. +A simple way is to use the packages from your Linux distribution if they are +suitable -- the first section below explains this approach. An advantage of this +approach is that, typically, the distribution will match the LLVM used by Rust +and Clang. + +Another way is using the prebuilt stable versions of LLVM+Rust provided on +`kernel.org `_. These are the same slim +and fast LLVM toolchains from :ref:`Getting LLVM ` with versions +of Rust added to them that Rust for Linux supports. Two sets are provided: the +"latest LLVM" and "matching LLVM" (please see the link for more information). + +Alternatively, the next two "Requirements" sections explain each component and +how to install them through ``rustup``, the standalone installers from Rust +and/or building them. + +The rest of the document explains other aspects on how to get started. + + +Distributions +------------- + +Arch Linux +********** + +Arch Linux provides recent Rust releases and thus it should generally work out +of the box, e.g.:: + + pacman -S rust rust-src rust-bindgen + + +Debian +****** + +Debian Unstable (Sid), outside of the freeze period, provides recent Rust +releases and thus it should generally work out of the box, e.g.:: + + apt install rustc rust-src bindgen rustfmt rust-clippy + + +Fedora Linux +************ + +Fedora Linux provides recent Rust releases and thus it should generally work out +of the box, e.g.:: + + dnf install rust rust-src bindgen-cli rustfmt clippy + + +Gentoo Linux +************ + +Gentoo Linux (and especially the testing branch) provides recent Rust releases +and thus it should generally work out of the box, e.g.:: + + USE='rust-src rustfmt clippy' emerge dev-lang/rust dev-util/bindgen + +``LIBCLANG_PATH`` may need to be set. + + +Nix +*** + +Nix (unstable channel) provides recent Rust releases and thus it should +generally work out of the box, e.g.:: + + { pkgs ? import {} }: + pkgs.mkShell { + nativeBuildInputs = with pkgs; [ rustc rust-bindgen rustfmt clippy ]; + RUST_LIB_SRC = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}"; + } + + +openSUSE +******** + +openSUSE Slowroll and openSUSE Tumbleweed provide recent Rust releases and thus +they should generally work out of the box, e.g.:: + + zypper install rust rust1.79-src rust-bindgen clang + Requirements: Building ---------------------- This section explains how to fetch the tools needed for building. -Some of these requirements might be available from Linux distributions -under names like ``rustc``, ``rust-src``, ``rust-bindgen``, etc. However, -at the time of writing, they are likely not to be recent enough unless -the distribution tracks the latest releases. - To easily check whether the requirements are met, the following target can be used:: @@ -29,16 +105,15 @@ if that is the case. rustc ***** -A particular version of the Rust compiler is required. Newer versions may or -may not work because, for the moment, the kernel depends on some unstable -Rust features. +A recent version of the Rust compiler is required. If ``rustup`` is being used, enter the kernel build directory (or use -``--path=`` argument to the ``set`` sub-command) and run:: +``--path=`` argument to the ``set`` sub-command) and run, +for instance:: - rustup override set $(scripts/min-tool-version.sh rustc) + rustup override set stable -This will configure your working directory to use the correct version of +This will configure your working directory to use the given version of ``rustc`` without affecting your default toolchain. Note that the override applies to the current working directory (and its @@ -65,9 +140,9 @@ version later on requires re-adding the component. Otherwise, if a standalone installer is used, the Rust source tree may be downloaded into the toolchain's installation folder:: - curl -L "https://static.rust-lang.org/dist/rust-src-$(scripts/min-tool-version.sh rustc).tar.gz" | + curl -L "https://static.rust-lang.org/dist/rust-src-$(rustc --version | cut -d' ' -f2).tar.gz" | tar -xzf - -C "$(rustc --print sysroot)/lib" \ - "rust-src-$(scripts/min-tool-version.sh rustc)/rust-src/lib/" \ + "rust-src-$(rustc --version | cut -d' ' -f2)/rust-src/lib/" \ --strip-components=3 In this case, upgrading the Rust compiler version later on requires manually @@ -101,26 +176,22 @@ bindgen ******* The bindings to the C side of the kernel are generated at build time using -the ``bindgen`` tool. A particular version is required. +the ``bindgen`` tool. -Install it via (note that this will download and build the tool from source):: +Install it, for instance, via (note that this will download and build the tool +from source):: - cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen-cli + cargo install --locked bindgen-cli -``bindgen`` needs to find a suitable ``libclang`` in order to work. If it is -not found (or a different ``libclang`` than the one found should be used), -the process can be tweaked using the environment variables understood by -``clang-sys`` (the Rust bindings crate that ``bindgen`` uses to access -``libclang``): +``bindgen`` uses the ``clang-sys`` crate to find a suitable ``libclang`` (which +may be linked statically, dynamically or loaded at runtime). By default, the +``cargo`` command above will produce a ``bindgen`` binary that will load +``libclang`` at runtime. If it is not found (or a different ``libclang`` than +the one found should be used), the process can be tweaked, e.g. by using the +``LIBCLANG_PATH`` environment variable. For details, please see ``clang-sys``'s +documentation at: -* ``LLVM_CONFIG_PATH`` can be pointed to an ``llvm-config`` executable. - -* Or ``LIBCLANG_PATH`` can be pointed to a ``libclang`` shared library - or to the directory containing it. - -* Or ``CLANG_PATH`` can be pointed to a ``clang`` executable. - -For details, please see ``clang-sys``'s documentation at: + https://github.com/KyleMayes/clang-sys#linking https://github.com/KyleMayes/clang-sys#environment-variables @@ -164,20 +235,6 @@ can be installed manually:: The standalone installers also come with ``clippy``. -cargo -***** - -``cargo`` is the Rust native build system. It is currently required to run -the tests since it is used to build a custom standard library that contains -the facilities provided by the custom ``alloc`` in the kernel. The tests can -be run using the ``rusttest`` Make target. - -If ``rustup`` is being used, all the profiles already install the tool, -thus nothing needs to be done. - -The standalone installers also come with ``cargo``. - - rustdoc ******* diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst index acfd0c2be48d008441de6c58d9afd0b7691b5cf5..568b71b415a4525772cb77ce76c7f9da32fac1c4 100644 --- a/Documentation/rust/testing.rst +++ b/Documentation/rust/testing.rst @@ -131,9 +131,8 @@ Additionally, there are the ``#[test]`` tests. These can be run using the make LLVM=1 rusttest -This requires the kernel ``.config`` and downloads external repositories. It -runs the ``#[test]`` tests on the host (currently) and thus is fairly limited in -what these tests can test. +This requires the kernel ``.config``. It runs the ``#[test]`` tests on the host +(currently) and thus is fairly limited in what these tests can test. The Kselftests -------------- diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst index f02e6cf3516a2047ada5779b09d2f0441306d148..74df19be91f69dec49951566d13bb52fbb637988 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst @@ -21,9 +21,9 @@ are often referred to as greyscale formats. .. raw:: latex - \scriptsize + \tiny -.. tabularcolumns:: |p{3.6cm}|p{3.0cm}|p{1.3cm}|p{2.6cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| +.. tabularcolumns:: |p{3.6cm}|p{2.4cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| .. flat-table:: Luma-Only Image Formats :header-rows: 1 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe722c5dada9f81e189a3e87426fda3883bbbd06..b3be87489108e8a3ebac587361881632f53154f5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2592,7 +2592,7 @@ Specifically: 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT] 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] - 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] + 0x6030 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] [1]_ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] [1]_ ... @@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap). See KVM_SET_USER_MEMORY_REGION2 for additional details. 4.143 KVM_PRE_FAULT_MEMORY ------------------------- +--------------------------- :Capability: KVM_CAP_PRE_FAULT_MEMORY :Architectures: none @@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a stage-2 read page fault, e.g. faults in memory as needed, but doesn't break CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. +In the case of confidential VM types where there is an initial set up of +private guest memory before the guest is 'finalized'/measured, this ioctl +should only be issued after completing all the necessary setup to put the +guest into a 'finalized' state so that the above semantics can be reliably +ensured. + In some cases, multiple vCPUs might share the page tables. In this case, the ioctl can be called in parallel. diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 29b1b2e6d42cdc2f37522c555cd1daa152c7d5f2..31a13694289238a04d9099718f55d6d86b06d678 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -130,12 +130,12 @@ data using the `bmfdec `_ utility: Due to a peculiarity in how Windows handles the ``CreateByteField()`` ACPI operator (errors only happen when a invalid byte field is ultimately accessed), all methods require a 32 byte input -buffer, even if the Binay MOF says otherwise. +buffer, even if the Binary MOF says otherwise. The input buffer contains a single byte to select the subfeature to be accessed and 31 bytes of input data, the meaning of which depends on the subfeature being accessed. -The output buffer contains a singe byte which signals success or failure (``0x00`` on failure) +The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. WMI method Get_EC() @@ -147,7 +147,7 @@ data contains a flag byte and a 28 byte controller firmware version string. The first 4 bits of the flag byte contain the minor version of the embedded controller interface, with the next 2 bits containing the major version of the embedded controller interface. -The 7th bit signals if the embedded controller page chaged (exact meaning is unknown), and the +The 7th bit signals if the embedded controller page changed (exact meaning is unknown), and the last bit signals if the platform is a Tigerlake platform. The MSI software seems to only use this interface when the last bit is set. diff --git a/MAINTAINERS b/MAINTAINERS index c0a3d9e936898baa1abbc7e3d42a14b5e1d13f84..f328373463b0d2c19d6b451a684f18e4ab7132e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2196,7 +2196,7 @@ N: digicolor ARM/CORESIGHT FRAMEWORK AND DRIVERS M: Suzuki K Poulose R: Mike Leach -R: James Clark +R: James Clark L: coresight@lists.linaro.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -5306,7 +5306,7 @@ F: drivers/media/cec/i2c/ch7322.c CIRRUS LOGIC AUDIO CODEC DRIVERS M: David Rhodes M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Maintained F: Documentation/devicetree/bindings/sound/cirrus,cs* @@ -5375,7 +5375,7 @@ F: sound/soc/codecs/lochnagar-sc.c CIRRUS LOGIC MADERA CODEC DRIVERS M: Charles Keepax M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Supported W: https://github.com/CirrusLogic/linux-drivers/wiki @@ -5613,6 +5613,7 @@ M: Ira Weiny M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained +F: Documentation/driver-api/cxl F: drivers/cxl/ F: include/linux/einj-cxl.h F: include/linux/cxl-event.h @@ -10655,6 +10656,7 @@ F: Documentation/ABI/testing/sysfs-bus-i3c F: Documentation/devicetree/bindings/i3c/ F: Documentation/driver-api/i3c F: drivers/i3c/ +F: include/dt-bindings/i3c/ F: include/linux/i3c/ IBM Operation Panel Input Driver @@ -13322,14 +13324,16 @@ F: Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt F: drivers/i2c/muxes/i2c-mux-ltc4306.c LTP (Linux Test Project) +M: Andrea Cervesato M: Cyril Hrubis M: Jan Stancek M: Petr Vorel M: Li Wang M: Yang Xu +M: Xiao Yang L: ltp@lists.linux.it (subscribers-only) S: Maintained -W: http://linux-test-project.github.io/ +W: https://linux-test-project.readthedocs.io/ T: git https://github.com/linux-test-project/ltp.git LTR390 AMBIENT/UV LIGHT SENSOR DRIVER @@ -13537,7 +13541,7 @@ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner M: Stephen Hemminger L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER @@ -15934,6 +15938,7 @@ F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h F: include/linux/netdevice.h +F: include/linux/skbuff.h F: include/net/ F: include/uapi/linux/in.h F: include/uapi/linux/net.h @@ -17894,7 +17899,7 @@ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 R: John Garry R: Will Deacon -R: James Clark +R: James Clark R: Mike Leach R: Leo Yan L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -18554,7 +18559,7 @@ F: drivers/usb/misc/qcom_eud.c QCOM IPA DRIVER M: Alex Elder L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/ipa/ QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT diff --git a/Makefile b/Makefile index 11e74386d909dbce5acf5dc5553cb6cfbbe05b0e..68ebd6d6b444df00aedb3d09baa4e484977f5eb0 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 10 +PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -445,17 +445,17 @@ KBUILD_USERLDFLAGS := $(USERLDFLAGS) # host programs. export rust_common_flags := --edition=2021 \ -Zbinary_dep_depinfo=y \ - -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms \ - -Dunreachable_pub -Dnon_ascii_idents \ + -Dunsafe_op_in_unsafe_fn \ + -Dnon_ascii_idents \ + -Wrust_2018_idioms \ + -Wunreachable_pub \ -Wmissing_docs \ - -Drustdoc::missing_crate_level_docs \ - -Dclippy::correctness -Dclippy::style \ - -Dclippy::suspicious -Dclippy::complexity \ - -Dclippy::perf \ - -Dclippy::let_unit_value -Dclippy::mut_mut \ - -Dclippy::needless_bitwise_bool \ - -Dclippy::needless_continue \ - -Dclippy::no_mangle_with_rust_abi \ + -Wrustdoc::missing_crate_level_docs \ + -Wclippy::all \ + -Wclippy::mut_mut \ + -Wclippy::needless_bitwise_bool \ + -Wclippy::needless_continue \ + -Wclippy::no_mangle_with_rust_abi \ -Wclippy::dbg_macro KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) \ @@ -493,7 +493,6 @@ RUSTDOC = rustdoc RUSTFMT = rustfmt CLIPPY_DRIVER = clippy-driver BINDGEN = bindgen -CARGO = cargo PAHOLE = pahole RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids LEX = flex @@ -559,7 +558,7 @@ KBUILD_RUSTFLAGS := $(rust_common_flags) \ -Csymbol-mangling-version=v0 \ -Crelocation-model=static \ -Zfunction-sections=n \ - -Dclippy::float_arithmetic + -Wclippy::float_arithmetic KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := @@ -587,7 +586,7 @@ endif export RUSTC_BOOTSTRAP := 1 export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG -export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO +export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX @@ -1959,9 +1958,12 @@ quiet_cmd_tags = GEN $@ tags TAGS cscope gtags: FORCE $(call cmd,tags) -# IDE support targets +# Generate rust-project.json (a file that describes the structure of non-Cargo +# Rust projects) for rust-analyzer (an implementation of the Language Server +# Protocol). PHONY += rust-analyzer rust-analyzer: + +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh $(Q)$(MAKE) $(build)=rust $@ # Script to generate missing namespace dependencies diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 2bb8cbeedf9160778a7bff1a444f8bc925612749..b191d87f89c4012849276b7b7b67c69ed974778a 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -534,8 +534,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define ioread16be(p) swab16(ioread16(p)) #define ioread32be(p) swab32(ioread32(p)) +#define ioread64be(p) swab64(ioread64(p)) #define iowrite16be(v,p) iowrite16(swab16(v), (p)) #define iowrite32be(v,p) iowrite32(swab32(v), (p)) +#define iowrite64be(v,p) iowrite64(swab64(v), (p)) #define inb_p inb #define inw_p inw @@ -634,8 +636,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); */ #define ioread64 ioread64 #define iowrite64 iowrite64 -#define ioread64be ioread64be -#define iowrite64be iowrite64be #define ioread8_rep ioread8_rep #define ioread16_rep ioread16_rep #define ioread32_rep ioread32_rep diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 954a1916a5003bcbb60f3766fad981d6a0b4e8e2..54b2bb817a7fc0615ad94b88fd4698512f29c0c5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -87,6 +87,7 @@ config ARM select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE @@ -116,6 +117,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL @@ -736,7 +738,7 @@ config ARM_ERRATA_764319 bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction" depends on CPU_V7 help - This option enables the workaround for the 764319 Cortex A-9 erratum. + This option enables the workaround for the 764319 Cortex-A9 erratum. CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an unexpected Undefined Instruction exception when the DBGSWENABLE external pin is set to 0, even when the CP14 accesses are performed diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6bca03c0c7f0e28e832187366d96b77ea0dfc614..945b5975fce20008c9c5ed15e2fb05894357efd9 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -9,6 +9,7 @@ OBJS = HEAD = head.o OBJS += misc.o decompress.o +CFLAGS_decompress.o += $(DISABLE_STACKLEAK_PLUGIN) ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o AFLAGS_head.o += -DDEBUG diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 3fcb3e62dc5692c5ea8ccdb47d97c169d7b0ac76..d411abd4310ea97f14401cd73b21d77250af095f 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -125,7 +125,7 @@ SECTIONS . = BSS_START; __bss_start = .; - .bss : { *(.bss) } + .bss : { *(.bss .bss.*) } _end = .; . = ALIGN(8); /* the stack must be 64-bit aligned */ diff --git a/arch/arm/boot/dts/arm/versatile-ab.dts b/arch/arm/boot/dts/arm/versatile-ab.dts index 6fe6b49f5d8ecfe1266a36130864697c909f5c81..635ab92688999aa9f1dff02ba59860104d349b5f 100644 --- a/arch/arm/boot/dts/arm/versatile-ab.dts +++ b/arch/arm/boot/dts/arm/versatile-ab.dts @@ -157,7 +157,7 @@ clocks = <&xtal24mhz>; }; - pclk: clock-24000000 { + pclk: clock-pclk { #clock-cells = <0>; compatible = "fixed-factor-clock"; clock-div = <1>; diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 360f0d2406bfa08364086e885157a3a02459c277..f80a85b091d6c4ff365e15ae7100af1c3aed597f 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -26,6 +26,13 @@ struct stackframe { #endif }; +static inline bool on_thread_stack(void) +{ + unsigned long delta = current_stack_pointer ^ (unsigned long)current->stack; + + return delta < THREAD_SIZE; +} + static __always_inline void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) { diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4c8632d5c432c3c7e9351ebae65a90ee9a4c31db..d60f6e83a9f700f3fc5fe1e08b09c7263f49bf79 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -42,7 +42,7 @@ #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ - *(.proc.info.init) \ + KEEP(*(.proc.info.init)) \ __proc_info_end = .; #define IDMAP_TEXT \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 6150a716828c3eca335ed52df75fe7fdcf40eb57..f01d23a220e65d20789a7aeebeffb629fcaf126b 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1065,6 +1065,7 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) @@ -1078,6 +1079,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_loop8_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) @@ -1090,6 +1092,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_bpiall_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 5c31e9de7a602e22a65f64c12cf2819be9fa56d6..f379c852dcb72fa5e960bba5621cc2152f0df0d3 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -119,6 +119,9 @@ no_work_pending: ct_user_enter save = 0 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase_on_task_stack +#endif restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 677f218f7e841acc1f7c392a3216e2bd848413e8..da488d92e7a00d0230d7b10c623a57773371ab3f 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -395,11 +395,6 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } -struct mod_unwind_map { - const Elf_Shdr *unw_sec; - const Elf_Shdr *txt_sec; -}; - static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) { diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 7147edbe56c67c41915c5fa8ae11a3b5493261d3..1d230ac9d0eb5c86fd5d0516b9b4b07c1f9b173b 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -85,8 +85,7 @@ static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, pc); - return true; + return perf_callchain_store(entry, pc) == 0; } void diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index c16d196b5aad3e159c160295fd21c75c3cb7055b..5eddb75a7174083f7bf143798f339171540c8475 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -83,7 +83,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bd9127c4b451015bb67b64cc273ff10ebb68a398..de373c6c2ae8780f190bef7357c4d70f3b3064cc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -99,7 +99,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { @@ -116,7 +116,7 @@ SECTIONS #endif .init.pv_table : { __pv_table_begin = .; - *(.pv_table) + KEEP(*(.pv_table)) __pv_table_end = .; } diff --git a/arch/arm/mach-alpine/alpine_cpu_pm.c b/arch/arm/mach-alpine/alpine_cpu_pm.c index 13ae8412e9ce6451cbf1820c07db66425679d65b..b48da6f12b6cc46a9b619bfeb8630ea94dc6f21e 100644 --- a/arch/arm/mach-alpine/alpine_cpu_pm.c +++ b/arch/arm/mach-alpine/alpine_cpu_pm.c @@ -29,7 +29,7 @@ int alpine_cpu_wakeup(unsigned int phys_cpu, uint32_t phys_resume_addr) /* * Set CPU resume address - * secure firmware running on boot will jump to this address - * after setting proper CPU mode, and initialiing e.g. secure + * after setting proper CPU mode, and initializing e.g. secure * regs (the same mode all CPUs are booted to - usually HYP) */ writel(phys_resume_addr, diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index efa6faa62a2cf0cefbfb3abd450bc176d0f49dba..1713bdf3b71e4b7541de751e32c1dd523ddf71fd 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include "udc.h" #include "gumstix.h" +#include "devices.h" #include "generic.h" @@ -99,8 +101,8 @@ static void __init gumstix_mmc_init(void) } #endif -#ifdef CONFIG_USB_PXA25X -static const struct property_entry spitz_mci_props[] __initconst = { +#if IS_ENABLED(CONFIG_USB_PXA25X) +static const struct property_entry gumstix_vbus_props[] __initconst = { PROPERTY_ENTRY_GPIO("vbus-gpios", &pxa2xx_gpiochip_node, GPIO_GUMSTIX_USB_GPIOn, GPIO_ACTIVE_HIGH), PROPERTY_ENTRY_GPIO("pullup-gpios", &pxa2xx_gpiochip_node, @@ -109,8 +111,9 @@ static const struct property_entry spitz_mci_props[] __initconst = { }; static const struct platform_device_info gumstix_gpio_vbus_info __initconst = { - .name = "gpio-vbus", - .id = PLATFORM_DEVID_NONE, + .name = "gpio-vbus", + .id = PLATFORM_DEVID_NONE, + .properties = gumstix_vbus_props, }; static void __init gumstix_udc_init(void) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c524b4c701834e093775662802d9341..9f7454b8efa79e9c3c1add236242326d0e946710 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c index bdbbf65d1b3669c5e05708b20c166683fb8c8f41..2027845efefb608fb4bb1bff1a744dc1ae0b8e0a 100644 --- a/arch/arm/mm/proc.c +++ b/arch/arm/mm/proc.c @@ -17,7 +17,7 @@ void cpu_arm7tdmi_proc_init(void); __ADDRESSABLE(cpu_arm7tdmi_proc_init); void cpu_arm7tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm7tdmi_proc_fin); -void cpu_arm7tdmi_reset(void); +void cpu_arm7tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm7tdmi_reset); int cpu_arm7tdmi_do_idle(void); __ADDRESSABLE(cpu_arm7tdmi_do_idle); @@ -32,7 +32,7 @@ void cpu_arm720_proc_init(void); __ADDRESSABLE(cpu_arm720_proc_init); void cpu_arm720_proc_fin(void); __ADDRESSABLE(cpu_arm720_proc_fin); -void cpu_arm720_reset(void); +void cpu_arm720_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm720_reset); int cpu_arm720_do_idle(void); __ADDRESSABLE(cpu_arm720_do_idle); @@ -49,7 +49,7 @@ void cpu_arm740_proc_init(void); __ADDRESSABLE(cpu_arm740_proc_init); void cpu_arm740_proc_fin(void); __ADDRESSABLE(cpu_arm740_proc_fin); -void cpu_arm740_reset(void); +void cpu_arm740_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm740_reset); int cpu_arm740_do_idle(void); __ADDRESSABLE(cpu_arm740_do_idle); @@ -64,7 +64,7 @@ void cpu_arm9tdmi_proc_init(void); __ADDRESSABLE(cpu_arm9tdmi_proc_init); void cpu_arm9tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm9tdmi_proc_fin); -void cpu_arm9tdmi_reset(void); +void cpu_arm9tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm9tdmi_reset); int cpu_arm9tdmi_do_idle(void); __ADDRESSABLE(cpu_arm9tdmi_do_idle); @@ -79,7 +79,7 @@ void cpu_arm920_proc_init(void); __ADDRESSABLE(cpu_arm920_proc_init); void cpu_arm920_proc_fin(void); __ADDRESSABLE(cpu_arm920_proc_fin); -void cpu_arm920_reset(void); +void cpu_arm920_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm920_reset); int cpu_arm920_do_idle(void); __ADDRESSABLE(cpu_arm920_do_idle); @@ -102,7 +102,7 @@ void cpu_arm922_proc_init(void); __ADDRESSABLE(cpu_arm922_proc_init); void cpu_arm922_proc_fin(void); __ADDRESSABLE(cpu_arm922_proc_fin); -void cpu_arm922_reset(void); +void cpu_arm922_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm922_reset); int cpu_arm922_do_idle(void); __ADDRESSABLE(cpu_arm922_do_idle); @@ -119,7 +119,7 @@ void cpu_arm925_proc_init(void); __ADDRESSABLE(cpu_arm925_proc_init); void cpu_arm925_proc_fin(void); __ADDRESSABLE(cpu_arm925_proc_fin); -void cpu_arm925_reset(void); +void cpu_arm925_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm925_reset); int cpu_arm925_do_idle(void); __ADDRESSABLE(cpu_arm925_do_idle); @@ -159,7 +159,7 @@ void cpu_arm940_proc_init(void); __ADDRESSABLE(cpu_arm940_proc_init); void cpu_arm940_proc_fin(void); __ADDRESSABLE(cpu_arm940_proc_fin); -void cpu_arm940_reset(void); +void cpu_arm940_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm940_reset); int cpu_arm940_do_idle(void); __ADDRESSABLE(cpu_arm940_do_idle); @@ -174,7 +174,7 @@ void cpu_arm946_proc_init(void); __ADDRESSABLE(cpu_arm946_proc_init); void cpu_arm946_proc_fin(void); __ADDRESSABLE(cpu_arm946_proc_fin); -void cpu_arm946_reset(void); +void cpu_arm946_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm946_reset); int cpu_arm946_do_idle(void); __ADDRESSABLE(cpu_arm946_do_idle); @@ -429,7 +429,7 @@ void cpu_v7_proc_init(void); __ADDRESSABLE(cpu_v7_proc_init); void cpu_v7_proc_fin(void); __ADDRESSABLE(cpu_v7_proc_fin); -void cpu_v7_reset(void); +void cpu_v7_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_v7_reset); int cpu_v7_do_idle(void); __ADDRESSABLE(cpu_v7_do_idle); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7fd70be0463f6ac3d8266433c07efcd3bc3f790b..a2f8ff354ca670af50e44c790c6c201b63aa0ddb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -168,9 +168,9 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) + select HAVE_ARCH_KASAN_VMALLOC + select HAVE_ARCH_KASAN_SW_TAGS + select HAVE_ARCH_KASAN_HW_TAGS if ARM64_MTE # Some instrumentation may be unsound, hence EXPERT select HAVE_ARCH_KCSAN if EXPERT select HAVE_ARCH_KFENCE @@ -211,8 +211,8 @@ config ARM64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_GCC_PLUGINS select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI @@ -1069,18 +1069,28 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-*/Neoverse-*: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A76 erratum 3324349 + * ARM Cortex-A77 erratum 3324348 + * ARM Cortex-A78 erratum 3324344 + * ARM Cortex-A78C erratum 3324346 + * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-A725 erratum 3456106 + * ARM Cortex-X1 erratum 3324344 + * ARM Cortex-X1C erratum 3324346 * ARM Cortex-X2 erratum 3324338 * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -1088,11 +1098,11 @@ config ARM64_ERRATUM_3194386 subsequent speculative instructions, which may permit unexepected speculative store bypassing. - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. + Work around this problem by placing a Speculation Barrier (SB) or + Instruction Synchronization Barrier (ISB) after kernel changes to + SSBS. The presence of the SSBS special-purpose register is hidden + from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such that userspace + will use the PR_SPEC_STORE_BYPASS prctl to change SSBS. If unsure, say Y. @@ -1471,7 +1481,6 @@ config HOTPLUG_CPU config NUMA bool "NUMA Memory Allocation and Scheduler Support" select GENERIC_ARCH_NUMA - select ACPI_NUMA if ACPI select OF_NUMA select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK @@ -2337,6 +2346,17 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config COMPRESSED_INSTALL + bool "Install compressed image by default" + help + This makes the regular "make install" install the compressed + image we built, not the legacy uncompressed one. + + You can check that a compressed image works for you by doing + "make zinstall" first, and verifying that everything is fine + in your environment before making "make install" do this for + you. + config DMI bool "Enable support for SMBIOS (DMI) tables" depends on EFI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3f0f35fd5bb7b13210b4605a000514a35d26dfd8..f6bc3da1ef110eb2d3539cfe977823a504c4ded0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -182,7 +182,13 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -install: KBUILD_IMAGE := $(boot)/Image +ifeq ($(CONFIG_COMPRESSED_INSTALL),y) + DEFAULT_KBUILD_IMAGE = $(KBUILD_IMAGE) +else + DEFAULT_KBUILD_IMAGE = $(boot)/Image +endif + +install: KBUILD_IMAGE := $(DEFAULT_KBUILD_IMAGE) install zinstall: $(call cmd,install) @@ -229,7 +235,7 @@ define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)' - echo ' install - Install uncompressed kernel' + echo ' install - Install kernel (compressed if COMPRESSED_INSTALL set)' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' echo ' (distribution) /sbin/installkernel or' diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi index e8f4d136e5dfb4dd67c16f450141ebf9cd498b24..9202181fbd6528e92f6f440bb0f48dc4fde2674b 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi @@ -43,15 +43,6 @@ sound-dai = <&mcasp0>; }; }; - - reg_usb_hub: regulator-usb-hub { - compatible = "regulator-fixed"; - enable-active-high; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; - regulator-boot-on; - regulator-name = "HUB_PWR_EN"; - }; }; /* Verdin ETHs */ @@ -193,11 +184,6 @@ status = "okay"; }; -/* Do not force CTRL_SLEEP_MOCI# always enabled */ -®_force_sleep_moci { - status = "disabled"; -}; - /* Verdin SD_1 */ &sdhci1 { status = "okay"; @@ -218,15 +204,7 @@ }; &usb1 { - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - usb-hub@1 { - compatible = "usb424,2744"; - reg = <1>; - vdd-supply = <®_usb_hub>; - }; }; /* Verdin CTRL_WAKE1_MICO# */ diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index 359f53f3e019b1dbdf01017bff1a42af0161bf44..5bef31b8577be52133a4e151727ba8579d0e51ae 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -138,12 +138,6 @@ vin-supply = <®_1v8>; }; - /* - * By default we enable CTRL_SLEEP_MOCI#, this is required to have - * peripherals on the carrier board powered. - * If more granularity or power saving is required this can be disabled - * in the carrier board device tree files. - */ reg_force_sleep_moci: regulator-force-sleep-moci { compatible = "regulator-fixed"; enable-active-high; diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi index e65db6ce02bf668ace388a7a547563584056d1d9..df7945156397b14015661193c93a006308501bed 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi @@ -146,6 +146,8 @@ power-domains = <&k3_pds 79 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 79 0>; clock-names = "gpio"; + gpio-ranges = <&mcu_pmx0 0 0 21>, <&mcu_pmx0 21 23 1>, + <&mcu_pmx0 22 32 2>; }; mcu_rti0: watchdog@4880000 { diff --git a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi index 57383bd2eaeb41cf6360411e6a08fc2fdd834bb1..0ce9721b417653850c08e8a57267bdafef66b38c 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi @@ -45,7 +45,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 92 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 22 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 149 2 PIN_GPIO_RANGE_IOPAD>; diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index c797980528ec7c10a59703374b920d3c9ef91c9a..dde4bd5c664539dc8b24ed4d9e05d09e1bf07ba4 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -193,7 +193,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 55 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 17 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 101 25 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts index 9338d987180d7560e56e4b720b50fc208480699f..ffa38f41679d8436f570682dc6133c53f51bbef3 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts @@ -1262,6 +1262,14 @@ &serdes0 { status = "okay"; + serdes0_pcie1_link: phy@0 { + reg = <0>; + cdns,num-lanes = <2>; + #phy-cells = <0>; + cdns,phy-type = ; + resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; + }; + serdes0_usb_link: phy@3 { reg = <3>; cdns,num-lanes = <1>; @@ -1386,23 +1394,6 @@ phys = <&transceiver3>; }; -&serdes0 { - status = "okay"; - - serdes0_pcie1_link: phy@0 { - reg = <0>; - cdns,num-lanes = <4>; - #phy-cells = <0>; - cdns,phy-type = ; - resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>, - <&serdes_wiz0 3>, <&serdes_wiz0 4>; - }; -}; - -&serdes_wiz0 { - status = "okay"; -}; - &pcie1_rc { status = "okay"; num-lanes = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi index f170f80f00c1b84bea97d243e51866777bd84376..d4ac1c9872a5e70ff9bf9e3f6146d92fce780a34 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi @@ -2755,7 +2755,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc500>, <&main_udmap 0x4500>; + dmas = <&main_udmap 0xc403>, <&main_udmap 0x4403>; dma-names = "tx", "rx"; clocks = <&k3_clks 268 0>; clock-names = "fck"; @@ -2773,7 +2773,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc501>, <&main_udmap 0x4501>; + dmas = <&main_udmap 0xc404>, <&main_udmap 0x4404>; dma-names = "tx", "rx"; clocks = <&k3_clks 269 0>; clock-names = "fck"; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1cb0704c6163f8ab16420f0aff6b5429688b96f9..5fd7caea441936246ce9ddb7b5d7bdf04b30d14b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,12 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -165,12 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 4e753908b80188f0e58b50a0048e1482e32ec94f..a0a5bbae7229e8a791748ebaa193c1ccd118ff1c 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,6 +13,7 @@ #include #include +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE #define JUMP_TABLE_ENTRY(key, label) \ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index d81bac256abc3599e2603ce4d15d2c47a3d6cf6a..6199c9f7ec6eda2fa7c677cf4a755928eb747e84 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -104,7 +104,7 @@ alternative_else_nop_endif #define __ptrauth_save_key(ctxt, key) \ do { \ - u64 __val; \ + u64 __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f8efbc128446d3da79265a2ffc1137ca28030504..7a4f5604be3f75d38b9db28485ea3cc702a4ebff 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1065,6 +1065,28 @@ static inline bool pgtable_l5_enabled(void) { return false; } #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) +static inline +p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + /* + * With runtime folding of the pud, pud_offset_lockless() passes + * the 'pgd_t *' we return here to p4d_to_folded_pud(), which + * will offset the pointer assuming that it points into + * a page-table page. However, the fast GUP path passes us a + * pgd_t allocated on the stack and so we must use the original + * pointer in 'pgdp' to construct the p4d pointer instead of + * using the generic p4d_offset_lockless() implementation. + * + * Note: reusing the original pointer means that we may + * dereference the same (live) page-table entry multiple times. + * This is safe because it is still only loaded once in the + * context of each level and the CPU guarantees same-address + * read-after-read ordering. + */ + return p4d_offset(pgdp, addr); +} +#define p4d_offset_lockless p4d_offset_lockless_folded + #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #define pgd_ERROR(e) \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a28cf403ae7a19ce63add56e4d94c..1aa4ecb73429fed92d3863e18e559099cdd913bf 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls index 3cfafd003b2db6868f234bd53240aa6e6f715b0c..0542a718871afa712446f8c674e680e9d187bb09 100644 --- a/arch/arm64/kernel/Makefile.syscalls +++ b/arch/arm64/kernel/Makefile.syscalls @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += -syscall_abis_64 += renameat newstat rlimit memfd_secret +syscall_abis_64 += renameat rlimit memfd_secret syscalltbl = arch/arm64/tools/syscall_%.tbl diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c338ae81e0e31626c54948f227e312d..2465f291c7e17cc8e1d02b3a23fb96cf5e71a22c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 617424b73f8c3251aba6a8a1680029dc39ac09d8..f6b6b4507357158c8e3a26881f7ceca1cdd20646 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,15 +434,24 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {} }; #endif diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e8a91487329fffb7f9d20fd3b6854..f63ea915d6ad250d56d20ac5e95ec203a641974c 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include #include +#include #include #include -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044d35bad506ecb67edcf3f2d30b87c..b22d28ec80284b1c4f593a4ffc51ed2b7012f7a3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a20cb3605b1421b32343613fde0dfd..f01f0fd7b7feb4a98c09d9184a703f5e76a89ce6 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d63930c82839784f1ec7210ed92ade738806cdc5..d11da6461278f316872be77e136edcf220b8c7e6 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index cc4508c604b28961b36b16a1790d97312b5d15bd..25a2cb6317f3592179dded21218e81036a8f67bc 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += -shared --build-id=sha1 VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 58f09370d17e017614c690000f55e9b3b816d77c..8304eb342be9d99687b16c549a0933caf7060aca 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -19,6 +19,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on AS_HAS_ARMV8_4 select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8c37d9159a6f99b162b22f18192ca..86a629aaf0a13fe59e28f8a7ad35d7269ec0dfd6 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,9 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ +CFLAGS_sys_regs.o += -Wno-override-init +CFLAGS_handle_exit.o += -Wno-override-init + kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a7ca776b51ec8a1425d457206f35811eb26fa77c..9bef7638342ef748d10277c905091e458bdff2af 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,7 @@ static int kvm_arm_default_max_vcpus(void) /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct + * @type: kvm device type */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { @@ -521,10 +522,10 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) { - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu) && !is_protected_kvm_enabled()) { /* - * Either we're running running an L2 guest, and the API/APK - * bits come from L1's HCR_EL2, or API/APK are both set. + * Either we're running an L2 guest, and the API/APK bits come + * from L1's HCR_EL2, or API/APK are both set. */ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { u64 val; @@ -541,16 +542,10 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Save the host keys if there is any chance for the guest * to use pauth, as the entry code will reload the guest * keys in that case. - * Protected mode is the exception to that rule, as the - * entry into the EL2 code eagerly switch back and forth - * between host and hyp keys (and kvm_hyp_ctxt is out of - * reach anyway). */ - if (is_protected_kvm_enabled()) - return; - if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); ptrauth_save_keys(ctxt); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f59ccfe11ab9ad473d96c13cb3da4dfc18a043b5..37ff87d782b62bdef9ccd312537079283bf5654d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 782b34b004be3c39a35e597dd8429b12f9a3976b..b43426a493df5a388caa920e259cc8c54d118a1b 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) +CFLAGS_switch.nvhe.o += -Wno-override-init + hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6af179c6356d666b68e426429572ce6d559bbd82..8f5c56d5b1cdf5c0d65774a8714a79ac95cc709f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -173,9 +173,8 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { /* - * Make sure we handle the exit for workarounds and ptrauth - * before the pKVM handling, as the latter could decide to - * UNDEF. + * Make sure we handle the exit for workarounds before the pKVM + * handling, as the latter could decide to UNDEF. */ return (kvm_hyp_handle_sysreg(vcpu, exit_code) || kvm_handle_pvm_sysreg(vcpu, exit_code)); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 3b9e5464b5b39ca07633751f2eea412c03d672f9..afc4aed9231ac0518b84131ec0bae4289eefbd1b 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,6 +6,8 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ +CFLAGS_switch.o += -Wno-override-init + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index de789e0f1ae9cb6e1bc7ba0bd7bfe691df0143af..bab27f9d8cc659531291830ec79c13eb62aac4d0 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -786,7 +786,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) if (!WARN_ON(atomic_read(&mmu->refcnt))) kvm_free_stage2_pgd(mmu); } - kfree(kvm->arch.nested_mmus); + kvfree(kvm->arch.nested_mmus); kvm->arch.nested_mmus = NULL; kvm->arch.nested_mmus_size = 0; kvm_uninit_stage2_mmu(kvm); diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bcbc8c986b1d63e2942869d66cd33cbfca5039ac..bc74d06398ef161618ae88d4bc9932ba801e00e4 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -45,7 +45,8 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && + iter->nr_lpis) { if (iter->lpi_idx < iter->nr_lpis) xa_find_after(&dist->lpi_xa, &iter->intid, VGIC_LPI_MAX_INTID, @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - iter->lpi_idx > iter->nr_lpis; + (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 7f68cf58b978fb11c78b4b0bb6d9c15a0625c45b..41feb858ff9a5e2c83046750a8168d578e2f202a 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -438,14 +438,13 @@ void kvm_vgic_destroy(struct kvm *kvm) unsigned long i; mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) __kvm_vgic_vcpu_destroy(vcpu); - mutex_lock(&kvm->arch.config_lock); - kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 8c711deb25aa006bc234f59de60cfcc015ed46b7..c314c016659abe6688d54ada383ad2cfb1495e8a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -9,7 +9,7 @@ #include #include "vgic.h" -/** +/* * vgic_irqfd_set_irq: inject the IRQ corresponding to the * irqchip routing entry * @@ -75,7 +75,8 @@ static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, msi->flags = e->msi.flags; msi->devid = e->msi.devid; } -/** + +/* * kvm_set_msi: inject the MSI corresponding to the * MSI routing entry * @@ -98,7 +99,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return vgic_its_inject_msi(kvm, &msi); } -/** +/* * kvm_arch_set_irq_inatomic: fast-path for irqfd injection */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 40bb43f20bf34794204fb6c4008316b817770e9a..ba945ba78cc7d7e749af5ae932d63a4bfcf6f643 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2040,6 +2040,7 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, * @start_id: the ID of the first entry in the table * (non zero for 2d level tables) * @fn: function to apply on each entry + * @opaque: pointer to opaque data * * Return: < 0 on error, 0 if last element was identified, 1 otherwise * (the last element may not be found on second level tables) @@ -2079,7 +2080,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return 1; } -/** +/* * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, @@ -2099,6 +2100,8 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, /** * vgic_its_restore_ite - restore an interrupt translation entry + * + * @its: its handle * @event_id: id used for indexing * @ptr: pointer to the ITE entry * @opaque: pointer to the its_device @@ -2231,6 +2234,7 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA + * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, gpa_t ptr, int dte_esz) @@ -2313,7 +2317,7 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, return 1; } -/** +/* * vgic_its_save_device_tables - Save the device table and all ITT * into guest RAM * @@ -2386,7 +2390,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, return ret; } -/** +/* * vgic_its_restore_device_tables - Restore the device table and all ITT * from guest RAM to internal data structs */ @@ -2478,7 +2482,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) return 1; } -/** +/* * vgic_its_save_collection_table - Save the collection table into * guest RAM */ @@ -2518,7 +2522,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_restore_collection_table - reads the collection table * in guest memory and restores the ITS internal state. Requires the * BASER registers to be restored before. @@ -2556,7 +2560,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM * according to v0 ABI */ @@ -2571,7 +2575,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its) return vgic_its_save_collection_table(its); } -/** +/* * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM * to internal data structs according to V0 ABI * diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ed6e412cd74bacbdb91588c6e637ef6f716353df..3eecdd2f4b8f552665096a2c3d1528c66f468c77 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -370,7 +370,7 @@ static void map_all_vpes(struct kvm *kvm) dist->its_vm.vpes[i]->irq)); } -/** +/* * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f07b3ddff7d442be815ad15f3462d5539c84e76b..974849ea7101c6000159727a7c0077ccead78136 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -313,7 +313,7 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * with all locks dropped. */ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 03d356a123771fda53c9aff25e475cbb68079bc6..ba8f790431bd3a8b34745f5679cbd587a2666440 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -186,7 +186,7 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags); + unsigned long flags) __releases(&irq->irq_lock); void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index a4c1dd4741a470741a88ad37951c5b67e11928c1..7ceaa1e0b4bc25992165eac2f32e147fea465b31 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -149,7 +149,7 @@ Res0 63:32 UnsignedEnum 31:28 GIC 0b0000 NI 0b0001 GICv3 - 0b0010 GICv4p1 + 0b0011 GICv4p1 EndEnum UnsignedEnum 27:24 Virt_frac 0b0000 NI @@ -903,7 +903,7 @@ EndEnum UnsignedEnum 27:24 GIC 0b0000 NI 0b0001 IMP - 0b0010 V4P1 + 0b0011 V4P1 EndEnum SignedEnum 23:20 AdvSIMD 0b0000 IMP diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ebdb7156560c7bb17b612182605aac951a5b5b02..70f169210b523fcb837c351abf7ad7e84673648d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -476,7 +476,6 @@ config NR_CPUS config NUMA bool "NUMA Support" select SMP - select ACPI_NUMA if ACPI help Say Y to compile the kernel with NUMA (Non-Uniform Memory Access) support. This option improves performance on systems with more diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index aa44b3fe43dde781fa0f9ef9517edcdc0f0bff89..5da32c00d483fbab845664373b950d45e98982bd 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -34,7 +34,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t clear; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_val(clear) = (unsigned long)invalid_pte_table; set_pte_at(mm, addr, ptep, clear); @@ -65,7 +65,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - int changed = !pte_same(*ptep, pte); + int changed = !pte_same(ptep_get(ptep), pte); if (changed) { set_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 92636e82957c7ed58201073c1477c68568421602..da9e93024626c5c96ee92db453a0910472a6c7c4 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -53,13 +53,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (WARN_ON(!pte) || pte_none(*pte)) + if (WARN_ON(!pte) || pte_none(ptep_get(pte))) return false; if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~(_PAGE_VALID | _PAGE_PRESENT))); else - set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | (_PAGE_VALID | _PAGE_PRESENT))); preempt_disable(); local_flush_tlb_one(addr); diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 44b54965f5b4f888b41d8eab7049f5b55ead0e37..5f0677e03817baacc343c8bb35f3eb6b410d398b 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -26,8 +26,6 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 335fb86778e2bc4d7a82c7e57670277a90efc481..43ec61589e6cde88f7120b5052d32e4435c0c13d 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -39,9 +39,9 @@ struct kvm_steal_time { * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args + * a1-a5: args * Return value will be placed in a0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ static __always_inline long kvm_hypercall0(u64 fid) { diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 3fbf1f37c58ecf6ee447adf4aeccd57d7a2bd538..85431f20a14db77f5ba77b78d6ccc7a199bb6eaa 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define KFENCE_AREA_START (VMEMMAP_END + 1) #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) +#define ptep_get(ptep) READ_ONCE(*(ptep)) +#define pmdp_get(pmdp) READ_ONCE(*(pmdp)) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -147,11 +150,6 @@ static inline int p4d_present(p4d_t p4d) return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void p4d_clear(p4d_t *p4dp) -{ - p4d_val(*p4dp) = (unsigned long)invalid_pud_table; -} - static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)p4d_val(p4d); @@ -159,7 +157,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *p4d = p4dval; + WRITE_ONCE(*p4d, p4dval); +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, __p4d((unsigned long)invalid_pud_table)); } #define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) @@ -193,17 +196,20 @@ static inline int pud_present(pud_t pud) return pud_val(pud) != (unsigned long)invalid_pmd_table; } -static inline void pud_clear(pud_t *pudp) +static inline pmd_t *pud_pgtable(pud_t pud) { - pud_val(*pudp) = ((unsigned long)invalid_pmd_table); + return (pmd_t *)pud_val(pud); } -static inline pmd_t *pud_pgtable(pud_t pud) +static inline void set_pud(pud_t *pud, pud_t pudval) { - return (pmd_t *)pud_val(pud); + WRITE_ONCE(*pud, pudval); } -#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud((unsigned long)invalid_pmd_table)); +} #define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) @@ -231,12 +237,15 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) != (unsigned long)invalid_pte_table; } -static inline void pmd_clear(pmd_t *pmdp) +static inline void set_pmd(pmd_t *pmd, pmd_t pmdval) { - pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); + WRITE_ONCE(*pmd, pmdval); } -#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd((unsigned long)invalid_pte_table)); +} #define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) @@ -314,7 +323,8 @@ extern void paging_init(void); static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); + if (pte_val(pteval) & _PAGE_GLOBAL) { pte_t *buddy = ptep_buddy(ptep); /* @@ -341,8 +351,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ - if (pte_none(*buddy)) - pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + if (pte_none(ptep_get(buddy))) + WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ } } @@ -350,7 +360,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* Preserve global status for the pair */ - if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) set_pte(ptep, __pte(_PAGE_GLOBAL)); else set_pte(ptep, __pte(0)); @@ -603,7 +613,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old = *pmdp; + pmd_t old = pmdp_get(pmdp); pmd_clear(pmdp); diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls index 523bb411a3bca559cb5b0e988dd08f662dac89d3..ab7d9baa29152da97932c7e447a183fba265451c 100644 --- a/arch/loongarch/kernel/Makefile.syscalls +++ b/arch/loongarch/kernel/Makefile.syscalls @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -syscall_abis_64 += newstat +# No special ABIs on loongarch so far +syscall_abis_64 += diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 000825406c1f62cdebd32e79714738987d20d5cc..2bf86aeda874c79f58ad6d6ba79477a4eda1811b 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -66,6 +66,12 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } +bool efi_poweroff_required(void) +{ + return efi_enabled(EFI_RUNTIME_SERVICES) && + (acpi_gbl_reduced_hardware || acpi_no_s5); +} + unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; #if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 2634a9e8d82cc54889465fe4042a50699ee6c186..28681dfb4b85975b04d2d81960312d929c1e3c82 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -714,19 +714,19 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 12222c56cb594829f6b4e1601feb36a19e99e584..e4068906143b33a9d850976692141252de43de17 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -39,11 +39,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { + if (pgd_present(pgdp_get(pgd))) { p4d = p4d_offset(pgd, addr); - if (p4d_present(*p4d)) { + if (p4d_present(p4dp_get(p4d))) { pud = pud_offset(p4d, addr); - if (pud_present(*pud)) + if (pud_present(pudp_get(pud))) pmd = pmd_offset(pud, addr); } } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index bf789d114c2d79a284b5d6e5acb84659c2c5d375..8a87a482c8f44bf085ae792da33342bb35b9b010 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -141,7 +141,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int huge = pmd_val(*pmd) & _PAGE_HUGE; + int huge = pmd_val(pmdp_get(pmd)) & _PAGE_HUGE; if (huge) vmemmap_verify((pte_t *)pmd, node, addr, next); @@ -173,7 +173,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pud) panic("%s: Failed to allocate memory\n", __func__); @@ -184,7 +184,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pmd) panic("%s: Failed to allocate memory\n", __func__); @@ -195,7 +195,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *pte; pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -216,7 +216,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ptep = populate_kernel_pte(addr); - if (!pte_none(*ptep)) { + if (!pte_none(ptep_get(ptep))) { pte_ERROR(*ptep); return; } diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index c608adc9984581d0419594a8eb87ae18a3e9ec63..427d6b1aec09e707837831888f55275165127c9b 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -105,7 +105,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (__pmd_none(early, READ_ONCE(*pmdp))) { + if (__pmd_none(early, pmdp_get(pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); if (!early) @@ -118,7 +118,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (__pud_none(early, READ_ONCE(*pudp))) { + if (__pud_none(early, pudp_get(pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); if (!early) @@ -131,7 +131,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) { - if (__p4d_none(early, READ_ONCE(*p4dp))) { + if (__p4d_none(early, p4dp_get(p4dp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); if (!early) @@ -154,7 +154,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && __pte_none(early, ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, @@ -166,7 +166,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, do { next = pmd_addr_end(addr, end); kasan_pte_populate(pmdp, addr, next, node, early); - } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, pmdp_get(pmdp))); } static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index bda018150000e66b906420ea7e3a5f79472ca352..eb6a29b491a72be73f64a472d57fe3c2856bc9c1 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -128,7 +128,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot) void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); flush_tlb_all(); } diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd0900539d65ba34956ce434bfc139..d20eec742bfaab6fdc0238f1926e98189a25fa5b 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5d650e02cbf4a33cc6bf5b4a1b1d9e106f399d40..b0a2ac3ba91610e4240c04f09f4e52a2600c6636 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -20,6 +20,7 @@ config PARISC select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 2a60d7a72f1fa80b96b058eced6c33895693b9b2..a3f0f100f21949f453c18119e61aaead71e01435 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -20,7 +20,16 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#ifdef CONFIG_PA20 +#define ARCH_DMA_MINALIGN 128 +#else +#define ARCH_DMA_MINALIGN 32 +#endif +#define ARCH_KMALLOC_MINALIGN 16 /* ldcw requires 16-byte alignment */ + +#define arch_slab_minalign() ((unsigned)dcache_stride) +#define cache_line_size() dcache_stride +#define dma_get_cache_alignment cache_line_size #define __read_mostly __section(".data..read_mostly") diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index 979f45d4d1fbeae5a60d92345409d413d352e7e1..06cbcd6fe87b80e0eeb35c8b134fe5b424ef3825 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->header = bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, - sizeof(u32), + sizeof(long), bpf_fill_ill_insns); if (!jit_data->header) { prog = orig_prog; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b731ec3eb5154986a8aecd573dd8d0..16bacfe8c7a2ca1f97d403d2889e7135effbd978 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include +#include #include static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616baa93a11e4015e01e9c7e986139eeb..943430077375a4e3c95f4ee9c8d38270081d557e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 961aadc71de223c53f70e70d114a5b6caf2ac697..5e6c7b5276775fb2b225720d30fbb5521ce6e16d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1984,8 +1984,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; r = -ENXIO; - if (!xive_enabled()) + if (!xive_enabled()) { + fdput(f); break; + } r = -EPERM; dev = kvm_device_from_filp(f.file); diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f877ce313d125ca007f0ed3cde92a1..2978fcbe307eabf43984ce631e7faa1dbeaff9c0 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201214b2af406afe4fcb4701cb5f51a..da21cb018984eb84f39e22a280e918dcd955c8ac 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 517b963e3e6a19806c80369de3c4449d12688605..a0934b516933ea5d32b93de5b2089350dceb8cdf 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -559,9 +559,7 @@ bool __init xive_native_init(void) struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio = 7; - const __be32 *p; u32 val, cpu; s64 rc; @@ -592,7 +590,7 @@ bool __init xive_native_init(void) max_prio = val - 1; /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index e454192643910075d3e501382453aba8ee0fae08..f2fa985a2c77128c5279a3038011f797611d8589 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -814,7 +814,6 @@ bool __init xive_spapr_init(void) struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio; u32 val; u32 len; @@ -866,7 +865,7 @@ bool __init xive_spapr_init(void) } /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3ceec2ca84fa955e6f031c5ff6c19b36c7ce3779..0f3cd7c3a4360529331d8dd0023c11460b957622 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,7 +13,9 @@ config 32BIT config RISCV def_bool y select ACPI_GENERIC_GSI if ACPI + select ACPI_PPTT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ACPI_SPCR_TABLE if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP @@ -123,6 +125,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU @@ -154,7 +157,6 @@ config RISCV select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KPROBES if !XIP_KERNEL - select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL # https://github.com/ClangBuiltLinux/linux/issues/1881 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD @@ -820,6 +822,8 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS endchoice +source "arch/riscv/Kconfig.vendor" + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor new file mode 100644 index 0000000000000000000000000000000000000000..6f1cdd32ed29a0479d253088a479d2c6563df447 --- /dev/null +++ b/arch/riscv/Kconfig.vendor @@ -0,0 +1,19 @@ +menu "Vendor extensions" + +config RISCV_ISA_VENDOR_EXT + bool + +menu "Andes" +config RISCV_ISA_VENDOR_EXT_ANDES + bool "Andes vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here if you want to disable all Andes vendor extension + support. This will cause any Andes vendor extensions that are + requested by hardware probing to be ignored. + + If you don't know what to do here, say Y. +endmenu + +endmenu diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 869c0345b908455ea34bcf7a3e5d92cdb208c63d..4e9e7a28bf9b1dc33948dd3e4cb9894c47649f41 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -18,7 +18,6 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.* loader loader.o loader.lds loader.bin targets := Image Image.* loader loader.o loader.lds loader.bin xipImage ifeq ($(CONFIG_XIP_KERNEL),y) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3f1f055866afe19ccd2360ad1c46f802714b71fa..0d678325444fccdc23ee7878a11836aa560b5e7b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -7,6 +7,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -35,9 +36,6 @@ CONFIG_ARCH_THEAD=y CONFIG_ARCH_VIRT=y CONFIG_ARCH_CANAAN=y CONFIG_SMP=y -CONFIG_HOTPLUG_CPU=y -CONFIG_PM=y -CONFIG_CPU_IDLE=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m @@ -52,13 +50,11 @@ CONFIG_ACPI=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_NET=y CONFIG_PACKET=y -CONFIG_UNIX=y CONFIG_XFRM_USER=m -CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y @@ -102,9 +98,9 @@ CONFIG_NET_SCHED=y CONFIG_NET_CLS_CGROUP=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_CAN=m CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y -CONFIG_CAN=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -153,8 +149,8 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y @@ -179,7 +175,6 @@ CONFIG_DEVFREQ_THERMAL=y CONFIG_RZG2L_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y -CONFIG_RENESAS_RZG2LWDT=y CONFIG_MFD_AXP20X_I2C=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y @@ -193,11 +188,9 @@ CONFIG_DRM_NOUVEAU=m CONFIG_DRM_SUN4I=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y -CONFIG_SND_SOC_RZ=m CONFIG_SND_DESIGNWARE_I2S=m CONFIG_SND_SOC_STARFIVE=m CONFIG_SND_SOC_JH7110_PWMDAC=m @@ -239,34 +232,31 @@ CONFIG_USB_CONFIGFS_F_FS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SDHCI_OF_DWCMSHC=y +CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_SDHI=y CONFIG_MMC_DW=y CONFIG_MMC_DW_STARFIVE=y -CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m CONFIG_DW_AXI_DMAC=y -CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y -CONFIG_RENESAS_OSTM=y CONFIG_CLK_SOPHGO_CV1800=y CONFIG_SUN8I_DE2_CCU=m +CONFIG_RENESAS_OSTM=y CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y -CONFIG_ARCH_R9A07G043=y +CONFIG_PM_DEVFREQ=y CONFIG_IIO=y -CONFIG_RZG2L_ADC=m -CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_PHY_SUN4I_USB=m CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index f2708a9494a10f0012ef7ac6068a8615b3efbced..fc1a34faa5f3bc5313bab237eac812503bca7994 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -17,6 +17,7 @@ #include #include #include +#include #define ANDES_AX45MP_MARCHID 0x8000000000008a45UL #define ANDES_AX45MP_MIMPID 0x500UL @@ -65,6 +66,8 @@ void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct al unsigned long archid, unsigned long impid, unsigned int stage) { + BUILD_BUG_ON(ERRATA_ANDES_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_BOOT) errata_probe_iocp(stage, archid, impid); diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 716cfedad3a2a442547df98b26a58f937e57caef..cea3b96ade11a6b54e9d7b97a2afdb66b829a417 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -12,6 +12,7 @@ #include #include #include +#include struct errata_info_t { char name[32]; @@ -96,6 +97,8 @@ void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 cpu_apply_errata = 0; u32 tmp; + BUILD_BUG_ON(ERRATA_SIFIVE_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index bf6a0a6318ee307085c3cc04c4056a0b63a000e9..f5120e07c3182642026e8e0c7083e054ba3fd9de 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -18,6 +18,7 @@ #include #include #include +#include #define CSR_TH_SXSTATUS 0x5c0 #define SXSTATUS_MAEE _AC(0x200000, UL) @@ -166,6 +167,8 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 tmp; void *oldptr, *altptr; + BUILD_BUG_ON(ERRATA_THEAD_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + for (alt = begin; alt < end; alt++) { if (alt->vendor_id != THEAD_VENDOR_ID) continue; diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 7dad0cf9d701f66135257753d0bb047fda59b5d2..e0a1f84404f31cc7fd467d50f9282f54b91860d7 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void acpi_init_rintc_map(void); struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); -u32 get_acpi_id_for_cpu(int cpu); +static inline u32 get_acpi_id_for_cpu(int cpu) +{ + return acpi_cpu_get_madt_rintc(cpu)->uid; +} + int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); -static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, u32 *cboz_size, u32 *cbop_size); #else @@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ACPI_NUMA +int acpi_numa_get_nid(unsigned int cpu); +void acpi_map_cpus_to_nodes(void); +#else +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +static inline void acpi_map_cpus_to_nodes(void) { } +#endif /* CONFIG_ACPI_NUMA */ + #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 880606b0469a83bc97d864da9235ce29b5807970..71af9ecfcfcb2b96c56611179b0a86c3ccfc2c0a 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -170,7 +170,7 @@ legacy: ({ \ typeof(x) x_ = (x); \ __builtin_constant_p(x_) ? \ - (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + ((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ : \ variable_fls(x_); \ }) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 000796c2d0b123bcc5cfa4f9e859ff97cec5a75c..45f9c1171a486a4dc6a4e0c80d64bf80a724b48f 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -33,6 +33,31 @@ extern struct riscv_isainfo hart_isa[NR_CPUS]; void riscv_user_isa_enable(void); +#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ + .name = #_name, \ + .property = #_name, \ + .id = _id, \ + .subset_ext_ids = _subset_exts, \ + .subset_ext_size = _subset_exts_size, \ + .validate = _validate \ +} + +#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) + +#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) + +/* Used to declare pure "lasso" extension (Zk for instance) */ +#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ + _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ + ARRAY_SIZE(_bundled_exts), NULL) + +/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ +#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) +#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) + #if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); void unaligned_emulation_finish(void); @@ -79,59 +104,66 @@ extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); +#define STANDARD_EXT 0 + bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); #define riscv_isa_extension_available(isa_bitmap, ext) \ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); return true; l_no: return false; } -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); return false; l_yes: return true; } +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); @@ -139,7 +171,10 @@ static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsign static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b18b202ca141a0907b988580d4dd49fa39fdbe8b..5a0bd27fd11a6d3d3b573c19ebaeb89d23dd8535 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -80,19 +80,18 @@ #define RISCV_ISA_EXT_ZFA 71 #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 -#define RISCV_ISA_EXT_XANDESPMU 74 -#define RISCV_ISA_EXT_ZVE32X 75 -#define RISCV_ISA_EXT_ZVE32F 76 -#define RISCV_ISA_EXT_ZVE64X 77 -#define RISCV_ISA_EXT_ZVE64F 78 -#define RISCV_ISA_EXT_ZVE64D 79 -#define RISCV_ISA_EXT_ZIMOP 80 -#define RISCV_ISA_EXT_ZCA 81 -#define RISCV_ISA_EXT_ZCB 82 -#define RISCV_ISA_EXT_ZCD 83 -#define RISCV_ISA_EXT_ZCF 84 -#define RISCV_ISA_EXT_ZCMOP 85 -#define RISCV_ISA_EXT_ZAWRS 86 +#define RISCV_ISA_EXT_ZVE32X 74 +#define RISCV_ISA_EXT_ZVE32F 75 +#define RISCV_ISA_EXT_ZVE64X 76 +#define RISCV_ISA_EXT_ZVE64F 77 +#define RISCV_ISA_EXT_ZVE64D 78 +#define RISCV_ISA_EXT_ZIMOP 79 +#define RISCV_ISA_EXT_ZCA 80 +#define RISCV_ISA_EXT_ZCB 81 +#define RISCV_ISA_EXT_ZCD 82 +#define RISCV_ISA_EXT_ZCF 83 +#define RISCV_ISA_EXT_ZCMOP 84 +#define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 150a9877b0af1f0bf79be13c697961ab7ce03d60..ffb9484531af7e4166ef970cb7c79f04176a2823 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 7 +#define RISCV_HWPROBE_MAX_KEY 9 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 235fd45d998d97b7815c203c9e9a6109796fe3c6..7ede2111c5917a31d7265a457a173be2dc339afa 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -37,7 +37,7 @@ * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) -#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) +#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* CONFIG_64BIT */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 5d473343634b9d3af3c1f1872da25e6e60f77162..fca5c6be2b8112ff6962a51ef0f89794a332f1a4 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -10,6 +10,7 @@ #include #include +#include /* thread information allocation */ #define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER diff --git a/arch/riscv/include/asm/vendor_extensions.h b/arch/riscv/include/asm/vendor_extensions.h new file mode 100644 index 0000000000000000000000000000000000000000..7437304a71b9111c80f5862aa5df6e58d86f73e0 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_VENDOR_EXTENSIONS_H +#define _ASM_VENDOR_EXTENSIONS_H + +#include + +#include +#include + +/* + * The extension keys of each vendor must be strictly less than this value. + */ +#define RISCV_ISA_VENDOR_EXT_MAX 32 + +struct riscv_isavendorinfo { + DECLARE_BITMAP(isa, RISCV_ISA_VENDOR_EXT_MAX); +}; + +struct riscv_isa_vendor_ext_data_list { + bool is_initialized; + const size_t ext_data_count; + const struct riscv_isa_ext_data *ext_data; + struct riscv_isavendorinfo per_hart_isa_bitmap[NR_CPUS]; + struct riscv_isavendorinfo all_harts_isa_bitmap; +}; + +extern struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[]; + +extern const size_t riscv_isa_vendor_ext_list_size; + +/* + * The alternatives need some way of distinguishing between vendor extensions + * and errata. Incrementing all of the vendor extension keys so they are at + * least 0x8000 accomplishes that. + */ +#define RISCV_VENDOR_EXT_ALTERNATIVES_BASE 0x8000 + +#define VENDOR_EXT_ALL_CPUS -1 + +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit); +#define riscv_cpu_isa_vendor_extension_available(cpu, vendor, ext) \ + __riscv_isa_vendor_extension_available(cpu, vendor, RISCV_ISA_VENDOR_EXT_##ext) +#define riscv_isa_vendor_extension_available(vendor, ext) \ + __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, \ + RISCV_ISA_VENDOR_EXT_##ext) + +static __always_inline bool riscv_has_vendor_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_has_vendor_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_likely(const unsigned long vendor, + int cpu, const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_unlikely(const unsigned long vendor, + int cpu, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +#endif /* _ASM_VENDOR_EXTENSIONS_H */ diff --git a/arch/riscv/include/asm/vendor_extensions/andes.h b/arch/riscv/include/asm/vendor_extensions/andes.h new file mode 100644 index 0000000000000000000000000000000000000000..7bb2fc43438f7a45dc3369fbf188fbf12c227642 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/andes.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H + +#include + +#include + +#define RISCV_ISA_VENDOR_EXT_XANDESPMU 0 + +/* + * Extension keys should be strictly less than max. + * It is safe to increment this when necessary. + */ +#define RISCV_ISA_VENDOR_EXT_MAX_ANDES 32 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes; + +#endif diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 8b8f6ac0eae28686f1aec432a307869c641b9386..1e153cda57db85b99d40f4fcc67aa69651a0f856 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -81,6 +81,13 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 +#define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 +#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 5b243d46f4b17477a70bbb809a580d60453d8f47..06d407f1b30bc2028ffe37df3b64fd9c5a9e17e4 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -58,6 +58,8 @@ obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ @@ -110,3 +112,4 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls index 52087a023b3df313709502f4ee3e046578c4b3a8..9668fd1faf60e828ed2786c2ee84739ac1f153fc 100644 --- a/arch/riscv/kernel/Makefile.syscalls +++ b/arch/riscv/kernel/Makefile.syscalls @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += riscv memfd_secret -syscall_abis_64 += riscv newstat rlimit memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc972c1a83809c9f0ebeb10c3331a6..ba957aaca5cbb8ab3e4bdc832fcccd87d6ea7a5d 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 0000000000000000000000000000000000000000..ff95aeebee3ebcd8e2b2eba42ec2e225932d6060 --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; + +int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1519b98f57be22badeaa2e8802a93..d6c108c50cba92e0eadb1843f71215e3f701fa19 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -64,7 +65,6 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; @@ -79,12 +79,33 @@ int populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -97,11 +118,11 @@ int populate_cache_leaves(unsigned int cpu) if (level <= levels) break; if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c1f3655238fd2b906db466295d5c3274c67d916c..f6b13e9f5e6cb65649779786f36417a2691f8e40 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include #include #include +#include bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -235,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -254,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -316,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -338,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 0366dc3baf3388daec95ed1c7e8adfc84ad3d3eb..b427188b28fc16defce7c05ae178b8b9de346d8f 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include #include #include +#include #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -100,31 +101,6 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } -#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ - .subset_ext_ids = _subset_exts, \ - .subset_ext_size = _subset_exts_size, \ - .validate = _validate \ -} - -#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) - -#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) - -/* Used to declare pure "lasso" extension (Zk for instance) */ -#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ - _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ - ARRAY_SIZE(_bundled_exts), NULL) - -/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ -#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) -#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) - static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -405,7 +381,6 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), - __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); @@ -457,28 +432,26 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { ext = riscv_get_isa_ext_data(bit); - if (!ext) - continue; - if (ext->validate) { + if (ext && ext->validate) { ret = ext->validate(ext, resolved_isa); if (ret == -EPROBE_DEFER) { loop = true; continue; } else if (ret) { /* Disable the extension entirely */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); continue; } } - set_bit(ext->id, resolved_isa); + set_bit(bit, resolved_isa); /* No need to keep it in source isa now that it is enabled */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); /* Single letter extensions get set in hwcap */ - if (ext->id < RISCV_ISA_EXT_BASE) - *this_hwcap |= isa2hwcap[ext->id]; + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); } @@ -512,6 +485,21 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -527,8 +515,6 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -728,6 +714,61 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; @@ -760,6 +801,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) } riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); of_node_put(cpu_node); @@ -776,6 +818,8 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -918,28 +962,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 68a24cf9481afe0649e671bd764fa688307c23bf..ac2e908d4418d0c4a03787981b02360a1e212312 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -88,7 +88,6 @@ SYM_CODE_START(handle_exception) call riscv_v_context_nesting_start #endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -97,7 +96,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -105,11 +105,14 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) ASM_NOKPROBE(handle_exception) @@ -130,6 +133,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51fde6940303514732e16ba72dd574d..34ef522f07a8c2179b5643b07c82f22284a1907b 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977cffb5559f25a2bd6adcce2eb1a60..d2129f2c61b86bc3137da40cd833e23080bcd342 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index a69dfa610aa85724e01858cc320a55bd46298bce..0000000000000000000000000000000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - if (unlikely(kprobe_ftrace_disabled)) - return; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index 1026e22955ccc3a036e33c10bc1dd1324fce3667..0cc5559c08d8ff2c2e8a74bb9bbecfe8f383c104 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -71,7 +71,7 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4f73c0ae44b25d443a64eb9ae75a01d7372d4fca..a2cde65b69e950d929e3f9a5165d5f72d307e850 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5a2edd7f027e5d12e682349c3f1b54b51cd3b735..dcd282419456137b81206cfdcf2676999550ab9f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); get_cpu_vector_context(); riscv_v_vstate_save(¤t->thread.vstate, regs); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 19baf0d574d359bdbdfaeaef91c8dd7a6398e685..0f8f1c95ac38c9d028fb9a1f7e9172141a3f80c1 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -96,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -106,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 10e311b2759d39d4480f1c3274c313c01b7efcb4..c6d5de22463f9d54a85675f1fbb3b63ba4a47619 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,7 +16,7 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); static inline int fp_is_valid(unsigned long fp, unsigned long sp) { @@ -71,7 +71,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { + if (pc == (unsigned long)handle_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 685594769535c7a9c957ee498383fd7d0f7fd5eb..cea0ca2bf2a25ecc671e31b141e84c6d1977da25 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -93,44 +94,45 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZACAS); + EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); EXT_KEY(ZBC); - EXT_KEY(ZBKB); EXT_KEY(ZBKC); EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); EXT_KEY(ZKNH); EXT_KEY(ZKSED); EXT_KEY(ZKSH); EXT_KEY(ZKT); - EXT_KEY(ZIHINTNTL); EXT_KEY(ZTSO); - EXT_KEY(ZACAS); - EXT_KEY(ZICOND); - EXT_KEY(ZIHINTPAUSE); - EXT_KEY(ZIMOP); - EXT_KEY(ZCA); - EXT_KEY(ZCB); - EXT_KEY(ZCMOP); - EXT_KEY(ZAWRS); /* * All the following extensions must depend on the kernel * support of V. */ if (has_vector()) { - EXT_KEY(ZVE32X); - EXT_KEY(ZVE32F); - EXT_KEY(ZVE64X); - EXT_KEY(ZVE64F); - EXT_KEY(ZVE64D); EXT_KEY(ZVBB); EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); EXT_KEY(ZVKG); EXT_KEY(ZVKNED); @@ -139,16 +141,14 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVKSED); EXT_KEY(ZVKSH); EXT_KEY(ZVKT); - EXT_KEY(ZVFH); - EXT_KEY(ZVFHMIN); } if (has_fpu()) { - EXT_KEY(ZFH); - EXT_KEY(ZFHMIN); - EXT_KEY(ZFA); EXT_KEY(ZCD); EXT_KEY(ZCF); + EXT_KEY(ZFA); + EXT_KEY(ZFH); + EXT_KEY(ZFHMIN); } #undef EXT_KEY } @@ -178,13 +178,13 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } @@ -192,12 +192,12 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus) { if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) - return RISCV_HWPROBE_MISALIGNED_FAST; + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) - return RISCV_HWPROBE_MISALIGNED_EMULATED; + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; - return RISCV_HWPROBE_MISALIGNED_SLOW; + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; } #endif @@ -225,6 +225,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; @@ -237,6 +238,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = user_max_virt_addr(); break; + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee858f3abf7a28c647ad1146410da0..51ebfd23e0076447518081d137102a9a11ff2e45 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b62d5a2f4541e754cc7ea322e38d42b4dab65b90..192cd5603e95f9b92211cb616b98df7c6bc602d9 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -338,7 +338,7 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; #endif if (!unaligned_enabled) @@ -532,13 +532,13 @@ static bool check_unaligned_access_emulated(int cpu) unsigned long tmp_var, tmp_val; bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index a9a6bcb02acf111cee35a274716a1c78cbdf5886..160628a2116de4a5e4135e4bba25fef35141b747 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -34,9 +34,9 @@ static int check_unaligned_access(void *param) struct page *page = param; void *dst; void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0; /* Make an unaligned destination buffer. */ @@ -95,14 +95,14 @@ static int check_unaligned_access(void *param) } if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; ratio = div_u64((byte_cycles * 100), word_cycles); pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", cpu, ratio / 100, ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; @@ -110,7 +110,7 @@ static int check_unaligned_access(void *param) * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions. */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) cpumask_set_cpu(cpu, &fast_misaligned_access); else cpumask_clear_cpu(cpu, &fast_misaligned_access); @@ -188,7 +188,7 @@ static int riscv_online_cpu(unsigned int cpu) static struct page *buf; /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 0000000000000000000000000000000000000000..a8126d1183412fced15572d7984c9e8d1b061999 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include +#include +#include + +#include +#include + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6a61aed944f17e9cf3a02a0bb3358e5974be42e7 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 0000000000000000000000000000000000000000..ec688c88456ad397a4538f43c13d0f742f65c231 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include +#include + +/* All Andes vendor extensions supported in Linux */ +const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f373380225a2dfdecd40bfc01ce8f5d13bc0..a9f2b4af8f3f18442117f79855cfe89b39ce4d7e 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -61,26 +61,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bfa2dea95354ff85a42f7062903b5757e5982db7..eb0649a61b4c1f3249967f893cf32cfdbadfb2d5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,8 +234,6 @@ static void __init setup_bootmem(void) */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); - /* * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. @@ -250,6 +248,16 @@ static void __init setup_bootmem(void) if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; + /* + * The size of the linear page mapping may restrict the amount of + * usable RAM. + */ + if (IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + } + /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: @@ -266,6 +274,7 @@ static void __init setup_bootmem(void) memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } + phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); @@ -918,7 +927,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; + end_va = kernel_map.virt_addr + kernel_map.size; for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), @@ -1087,7 +1096,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else @@ -1284,8 +1293,6 @@ static void __init create_linear_mapping_page_table(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - if (end >= __pa(PAGE_OFFSET) + memory_limit) - end = __pa(PAGE_OFFSET) + memory_limit; create_linear_mapping_range(start, end, 0, NULL); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 5bcf3af903daa2f9fb2aaf1e57d79121bfcda988..0e6ca6d5ae4b418f1c4d99906a5ab57f10485a00 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,6 +7,7 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ +#include #include .text @@ -34,6 +35,7 @@ SYM_CODE_END(purgatory_start) .data +.align LGREG SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 59e0d861e26ffcbda3ea32d0c538e97f9fba204a..a822f952f64a9c6cafadd3e8b96f905edfcc5829 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -799,17 +799,6 @@ config HAVE_PNETID menu "Virtualization" -config PROTECTED_VIRTUALIZATION_GUEST - def_bool n - prompt "Protected virtualization guest support" - help - Select this option, if you want to be able to run this - kernel as a protected virtualization KVM guest. - Protected virtualization capable machines have a mini hypervisor - located at machine level (an ultravisor). With help of the - Ultravisor, KVM will be able to run "protected" VMs, special - VMs whose memory and management data are unavailable to KVM. - config PFAULT def_bool y prompt "Pseudo page fault support" diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e7658997452b1ac82f98e2016e609822ffe9a0a2..4f476884d340442a995d85d2a5ff18dfd87fdf2c 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 0000000000000000000000000000000000000000..abc08d2c873d27d2eb09724f2bec21f31120d34a --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../kernel/alternative.c" diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 18027fdc92b0c9b4b12170f28ac5ee4bd64ff03a..83e2ce050b6cef93df150e098a24607211268307 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -30,6 +30,8 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; @@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; + #define vmlinux _vmlinux_info +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) #define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index a21f301acd2962b23677cce09a269746637e50cb..1773b72a6a7bc3627e76f0ee4acd51dacc9b7621 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -310,5 +311,7 @@ void parse_boot_command_line(void) prot_virt_host = 1; } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + relocate_lowcore = 1; } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index c59014945af0507f3b6b3c2888da6bc900edffc3..ce232552bc1c38684ad8036203041ec9874eb8af 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); @@ -376,6 +377,8 @@ static void kaslr_adjust_vmlinux_info(long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -478,8 +481,12 @@ void startup_kernel(void) * before the kernel started. Therefore, in case the two sections * overlap there is no risk of corrupting any data. */ - if (kaslr_enabled()) - amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); + } if (!amode31_lma) amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); @@ -503,6 +510,9 @@ void startup_kernel(void) kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); /* * Save KASLR offset for early dumps, before vmcore_info is set. diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 1e66d2cbb0965a145730d08bf70e79d48ba1e1a9..318e6ba95bfd27b266b2c720392fa8eaf0b045af 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -8,12 +8,8 @@ #include "uv.h" /* will be used in arch/s390/kernel/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); -#endif -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); -#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) @@ -53,14 +49,11 @@ void uv_query_info(void) uv_info.max_secrets = uvcb.max_secrets; } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; -#endif } -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit) { if (is_prot_virt_host() && uv_info.max_sec_stor_addr) @@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void) { prot_virt_host = is_prot_virt_host_capable(); } -#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h index 0f3070856f8dca909ee500b1b04100bc4fedf31e..da4a4a8d48e0aa7386657894542b2f2d6eda98cc 100644 --- a/arch/s390/boot/uv.h +++ b/arch/s390/boot/uv.h @@ -2,21 +2,8 @@ #ifndef BOOT_UV_H #define BOOT_UV_H -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit); void sanitize_prot_virt_host(void); -#else -static inline unsigned long adjust_to_uv_max(unsigned long limit) -{ - return limit; -} -static inline void sanitize_prot_virt_host(void) {} -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif #endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index a255ca189aaad959aa5afe5b11bbcf72183671de..2847cc059ab7aca7a7d89a2e023d300a85b718ea 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); enum populate_mode { POPULATE_NONE, POPULATE_DIRECT, + POPULATE_LOWCORE, POPULATE_ABS_LOWCORE, POPULATE_IDENTITY, POPULATE_KERNEL, @@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return -1; case POPULATE_DIRECT: return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); case POPULATE_KERNEL: @@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) { + unsigned long lowcore_address = 0; unsigned long start, end; unsigned long asce_type; unsigned long asce_bits; @@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + if (relocate_lowcore) + lowcore_address = LOWCORE_ALT_ADDRESS; + /* * To allow prefixing the lowcore must be mapped with 4KB pages. * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f3602414a9614f9d0aa7f1b8f2377c69a8181098..ea63a7342f5ff278a346425f84fb112951354f25 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d0d8925fdf09f091c2a269bd7c3d2dd31d9ca7db..d8b28ff8ff451e91b79b9caeca3c1e6977e44be5 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 6f264b79e37794736ab5eb10c5971fe5fa1c5d7b..d20df8c923fc70fa2a59265ae28474503ea85b2c 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H +#include #include #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } +extern int __bootdata_preserved(relocate_lowcore); + +static inline int have_relocated_lowcore(void) +{ + return relocate_lowcore; +} + #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h deleted file mode 100644 index 608f6287ca9ccab697a9c75452875539cd79a8fd..0000000000000000000000000000000000000000 --- a/arch/s390/include/asm/alternative-asm.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_ALTERNATIVE_ASM_H -#define _ASM_S390_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature - .long \orig_start - . - .long \alt_start - . - .word \feature - .byte \orig_end - \orig_start - .org . - ( \orig_end - \orig_start ) & 1 - .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) - .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature - .pushsection .altinstr_replacement,"ax" -770: \newinstr -771: .popsection -772: \oldinstr -773: .pushsection .altinstructions,"a" - alt_entry 772b, 773b, 770b, 771b, \feature - .popsection -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 - .pushsection .altinstr_replacement,"ax" -770: \newinstr1 -771: \newinstr2 -772: .popsection -773: \oldinstr -774: .pushsection .altinstructions,"a" - alt_entry 773b, 774b, 770b, 771b,\feature1 - alt_entry 773b, 774b, 771b, 772b,\feature2 - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index dd93b92c3ab69ad285a8a87407f814fa3d60f11e..de980c938a3e148f15044fff9cd4e943ccad1bb1 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -2,6 +2,58 @@ #ifndef _ASM_S390_ALTERNATIVE_H #define _ASM_S390_ALTERNATIVE_H +/* + * Each alternative comes with a 32 bit feature field: + * union { + * u32 feature; + * struct { + * u32 ctx : 4; + * u32 type : 8; + * u32 data : 20; + * }; + * } + * + * @ctx is a bitfield, where only one bit must be set. Each bit defines + * in which context an alternative is supposed to be applied to the + * kernel image: + * + * - from the decompressor before the kernel itself is executed + * - from early kernel code from within the kernel + * + * @type is a number which defines the type and with that the type + * specific alternative patching. + * + * @data is additional type specific information which defines if an + * alternative should be applied. + */ + +#define ALT_CTX_EARLY 1 +#define ALT_CTX_LATE 2 +#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) + +#define ALT_TYPE_FACILITY 0 +#define ALT_TYPE_SPEC 1 +#define ALT_TYPE_LOWCORE 2 + +#define ALT_DATA_SHIFT 0 +#define ALT_TYPE_SHIFT 20 +#define ALT_CTX_SHIFT 28 + +#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) + #ifndef __ASSEMBLY__ #include @@ -11,12 +63,30 @@ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 facility; /* facility bit set for replacement */ + union { + u32 feature; /* feature required for replacement */ + struct { + u32 ctx : 4; /* context */ + u32 type : 8; /* type of alternative */ + u32 data : 20; /* patching information */ + }; + }; u8 instrlen; /* length of original instruction */ } __packed; -void apply_alternative_instructions(void); -void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx); + +static inline void apply_alternative_instructions(void) +{ + __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE); +} + +static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + __apply_alternatives(start, end, ALT_CTX_ALL); +} /* * +---------------------------------+ @@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define OLDINSTR(oldinstr) \ "661:\n\t" oldinstr "\n662:\n" -#define ALTINSTR_ENTRY(facility, num) \ +#define ALTINSTR_ENTRY(feature, num) \ "\t.long 661b - .\n" /* old instruction */ \ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ - "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.long " __stringify(feature) "\n" /* feature */ \ "\t.byte " oldinstr_len "\n" /* instruction len */ \ "\t.org . - (" oldinstr_len ") & 1\n" \ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \ @@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" /* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ +#define ALTERNATIVE(oldinstr, altinstr, feature) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr, 1) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility, 1) \ + ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ +#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr1, 1) \ ALTINSTR_REPLACEMENT(altinstr2, 2) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility1, 1) \ - ALTINSTR_ENTRY(facility2, 2) \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" /* @@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, altinstr, facility) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") +#define alternative(oldinstr, altinstr, feature) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory") -#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ - altinstr2, facility2) ::: "memory") +#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \ + altinstr2, feature2) ::: "memory") /* Alternative inline assembly with input. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); : : input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, altinstr, facility, output, input...) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ +#define alternative_io(oldinstr, altinstr, feature, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \ : output : input) /* Use this macro if more than one output parameter is needed. */ @@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); /* Use this macro if clobbers are needed without inputs. */ #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber +#else /* __ASSEMBLY__ */ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .long \feature + .byte \orig_end - \orig_start + .org . - ( \orig_end - \orig_start ) & 1 + .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) + .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: .pushsection .altinstructions,"a" + alt_entry 772b, 773b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: .pushsection .altinstructions,"a" + alt_entry 773b, 774b, 770b, 771b,\feature1 + alt_entry 773b, 774b, 771b, 772b,\feature2 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 2b379d1d9046b9b927f3858481ae6003452b0193..742c7919cbcdc193b497b1fe90d7f0eeaeecd0d0 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -#ifdef __GCC_ASM_FLAG_OUTPUTS__ +/* GCC versions before 14.2.0 may die with an ICE in some configurations. */ +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200)) static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index d46cc725f0241eb62d7cdcf866a3ff211df3c460..b7d234838a3667bd7fb15f097e0480fa3dc0c82d 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -20,7 +20,6 @@ #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) extern u64 stfle_fac_list[16]; -extern u64 alt_stfle_fac_list[16]; static inline void __set_facility(unsigned long nr, void *facilities) { diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h index 27db65fbf3f67935d967a5846af42e9806f186fa..f73e181d09aeb1d09314bd1b5bc0cccdee43752c 100644 --- a/arch/s390/include/asm/kmsan.h +++ b/arch/s390/include/asm/kmsan.h @@ -12,8 +12,8 @@ static inline bool is_lowcore_addr(void *addr) { - return addr >= (void *)&S390_lowcore && - addr < (void *)(&S390_lowcore + 1); + return addr >= (void *)get_lowcore() && + addr < (void *)(get_lowcore() + 1); } static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) @@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) * order to get a distinct struct page. */ addr += (void *)lowcore_ptr[raw_smp_processor_id()] - - (void *)&S390_lowcore; + (void *)get_lowcore(); if (KMSAN_WARN_ON(is_lowcore_addr(addr))) return NULL; return kmsan_get_metadata(addr, is_origin); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index c724e71e17852dadf805eb246bb421e6c7ab04ec..183ac29afaf8214d6b19116799fc1b39ebda95dc 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -14,10 +14,15 @@ #include #include #include +#include #define LC_ORDER 1 #define LC_PAGES 2 +#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL) + +#ifndef __ASSEMBLY__ + struct pgm_tdb { u64 data[32]; }; @@ -97,8 +102,7 @@ struct lowcore { __u64 save_area_async[8]; /* 0x0240 */ __u64 save_area_restart[1]; /* 0x0280 */ - /* CPU flags. */ - __u64 cpu_flags; /* 0x0288 */ + __u64 pcpu; /* 0x0288 */ /* Return psws. */ psw_t return_psw; /* 0x0290 */ @@ -215,7 +219,14 @@ struct lowcore { static __always_inline struct lowcore *get_lowcore(void) { - return NULL; + struct lowcore *lc; + + if (__is_defined(__DECOMPRESSOR)) + return NULL; + asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + return lc; } extern struct lowcore *lowcore_ptr[]; @@ -225,4 +236,19 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } +#else /* __ASSEMBLY__ */ + +.macro GET_LC reg + ALTERNATIVE "llilh \reg,0", \ + __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ + ALT_LOWCORE +.endm + +.macro STMG_LC start, end, savearea + ALTERNATIVE "stmg \start, \end, \savearea", \ + __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ + ALT_LOWCORE +.endm + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index b9c1f3cae84267d05c93f473414c31ce82157eb8..192835a3e24dcf195e31552e266d37791b537f52 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -5,8 +5,17 @@ #ifndef __ASSEMBLY__ #include +#include extern int nospec_disable; +extern int nobp; + +static inline bool nobp_enabled(void) +{ + if (__is_defined(__DECOMPRESSOR)) + return false; + return nobp && test_facility(82); +} void nospec_init_branches(void); void nospec_auto_detect(void); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5ec41ec3d761e3fa6b603da90c35280ab34d8a50..06416b3f94f595d26ca6687ebf4cfc1e6a2a7758 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -#if IS_ENABLED(CONFIG_PGSTE) int arch_make_folio_accessible(struct folio *folio); #define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -#endif struct vm_layout { unsigned long kaslr_offset; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c87cf2b8e81af864f64f9e1657487751c7ff5853..5ecd442535b9ed260a889361a63960277718d69a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,13 +14,11 @@ #include -#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) @@ -42,21 +40,37 @@ #include #include +struct pcpu { + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ + unsigned long flags; /* per CPU flags */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; + +DECLARE_PER_CPU(struct pcpu, pcpu_devices); + typedef long (*sys_call_ptr_t)(struct pt_regs *regs); +static __always_inline struct pcpu *this_pcpu(void) +{ + return (struct pcpu *)(get_lowcore()->pcpu); +} + static __always_inline void set_cpu_flag(int flag) { - get_lowcore()->cpu_flags |= (1UL << flag); + this_pcpu()->flags |= (1UL << flag); } static __always_inline void clear_cpu_flag(int flag) { - get_lowcore()->cpu_flags &= ~(1UL << flag); + this_pcpu()->flags &= ~(1UL << flag); } static __always_inline bool test_cpu_flag(int flag) { - return get_lowcore()->cpu_flags & (1UL << flag); + return this_pcpu()->flags & (1UL << flag); } static __always_inline bool test_and_set_cpu_flag(int flag) @@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; - - return lc->cpu_flags & (1UL << flag); + return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82)); + asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h new file mode 100644 index 0000000000000000000000000000000000000000..17878b1d048cfbed4f518b134f14ab45af5ef226 --- /dev/null +++ b/arch/s390/include/asm/runtime-const.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_RUNTIME_CONST_H +#define _ASM_S390_RUNTIME_CONST_H + +#include + +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + \ + asm_inline( \ + "0: iihf %[__ret],%[c1]\n" \ + " iilf %[__ret],%[c2]\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "=d" (__ret) \ + : [c1] "i" (0x01234567UL), \ + [c2] "i" (0x89abcdefUL)); \ + __ret; \ +}) + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + unsigned int __ret = (val); \ + \ + asm_inline( \ + "0: srl %[__ret],12\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "+d" (__ret)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 32-bit immediate for iihf and iilf in bits in I2 field */ +static inline void __runtime_fixup_32(u32 *p, unsigned int val) +{ + s390_kernel_write(p, &val, sizeof(val)); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __runtime_fixup_32(where + 2, val >> 32); + __runtime_fixup_32(where + 8, val); +} + +/* Immediate value is lower 12 bits of D2 field of srl */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + u32 insn = *(u32 *)where; + + insn &= 0xfffff000; + insn |= (val & 63); + s390_kernel_write(where, &insn, sizeof(insn)); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_S390_RUNTIME_CONST_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c13c79025348c39a9e18269bbb8d746e597fe6f7..cd835f4fb11a1dafcab776bef676542765c799c9 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern void smp_call_online_cpu(void (*func)(void *), void *); extern void smp_call_ipl_cpu(void (*func)(void *), void *); extern void smp_emergency_stop(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 3e43c90ff1354612de111ef55cc22e930e0c12e9..77d5e804af93e5a2ed2412f20a8f5a6b72ed65e5 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) typecheck(int, lp->lock); kcsan_release(); asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ " sth %1,%0\n" : "=R" (((unsigned short *) &lp->lock)[1]) : "d" (0) : "cc", "memory"); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index d02a709717b87a3aa141ce12ee4fabba6457660e..00ac01874a129f560270b7ef9d8d9ecaa852a964 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ unsigned int cpu; /* current CPU */ + unsigned char sie; /* running in SIE context */ }; /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9213be0529ee5d48be943e9e29a53244c82ffd61..a81f897a81ceb34055c85c8afb809b69b0805306 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -void *s390_kernel_write(void *dst, const void *src, size_t size); +void *__s390_kernel_write(void *dst, const void *src, size_t size); + +static inline void *s390_kernel_write(void *dst, const void *src, size_t size) +{ + if (__is_defined(__DECOMPRESSOR)) + return memcpy(dst, src, size); + return __s390_kernel_write(dst, src, size); +} int __noreturn __put_kernel_bad(void); diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0679445cac0be8ea9f76e35588d720192b3c77f0..153d93468b77c0331605bbe418c99d920e049372 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit) return test_bit_inv(feature_bit, &uv_info.uv_feature_indications); } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; static inline int is_prot_virt_guest(void) @@ -442,7 +441,10 @@ static inline int share(unsigned long addr, u16 cmd) if (!uv_call(0, (u64)&uvcb)) return 0; - return -EINVAL; + pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", + uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", + uvcb.header.rc, uvcb.header.rrc); + panic("System security cannot be guaranteed unless the system panics now.\n"); } /* @@ -466,13 +468,6 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -#else -#define is_prot_virt_guest() 0 -static inline int uv_set_shared(unsigned long addr) { return 0; } -static inline int uv_remove_shared(unsigned long addr) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_KVM) extern int prot_virt_host; static inline int is_prot_virt_host(void) @@ -489,29 +484,5 @@ int uv_convert_from_secure_pte(pte_t pte); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); -#else -#define is_prot_virt_host() 0 -static inline void setup_uv(void) {} - -static inline int uv_pin_shared(unsigned long paddr) -{ - return 0; -} - -static inline int uv_destroy_folio(struct folio *folio) -{ - return 0; -} - -static inline int uv_destroy_pte(pte_t pte) -{ - return 0; -} - -static inline int uv_convert_from_secure_pte(pte_t pte) -{ - return 0; -} -#endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7241fa194709208d0feacd64fc89e91691e70746..e47a4be54ff8e2ab2ca26bc2c73c0554c6027122 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o extra-y += vmlinux.lds @@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o obj-$(CONFIG_TRACEPOINTS) += trace.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index f9efc54ec4b7cdae459cc5a29437e70caf2e8680..09cd24cbe74e4c7e09f567fc31606ba3f13fd920 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -4,6 +4,7 @@ #include unsigned long __bootdata_preserved(__abs_lowcore); +int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 1ac5f707dd70500e568354631e1193b259721828..8d5d0de35de05d8ef02e4a144144ff8ae2fea859 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,68 +1,41 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include + +#include +#include +#include #include #include -#include - -static int __initdata_or_module alt_instr_disabled; - -static int __init disable_alternative_instructions(char *str) -{ - alt_instr_disabled = 1; - return 0; -} - -early_param("noaltinstr", disable_alternative_instructions); -static void __init_or_module __apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - struct alt_instr *a; u8 *instr, *replacement; + struct alt_instr *a; + bool replace; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ for (a = start; a < end; a++) { + if (!(a->ctx & ctx)) + continue; + switch (a->type) { + case ALT_TYPE_FACILITY: + replace = test_facility(a->data); + break; + case ALT_TYPE_SPEC: + replace = nobp_enabled(); + break; + case ALT_TYPE_LOWCORE: + replace = have_relocated_lowcore(); + break; + default: + replace = false; + } + if (!replace) + continue; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - - if (!__test_facility(a->facility, alt_stfle_fac_list)) - continue; s390_kernel_write(instr, replacement, a->instrlen); } } - -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) -{ - if (!alt_instr_disabled) - __apply_alternatives(start, end); -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -void __init apply_alternative_instructions(void) -{ - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - -static void do_sync_core(void *info) -{ - sync_core(); -} - -void text_poke_sync(void) -{ - on_each_cpu(do_sync_core, NULL, 1); -} - -void text_poke_sync_lock(void) -{ - cpus_read_lock(); - text_poke_sync(); - cpus_read_unlock(); -} diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 26bb45d0e6f13f0177cf523c43ef774dd220dd64..ffa0dd2dbaacba0fcdaebf0e6c36abd5ceda47c1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -28,6 +28,7 @@ int main(void) BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); + OFFSET(__TI_sie, task_struct, thread_info.sie); BLANK(); /* pt_regs offsets */ OFFSET(__PT_PSW, pt_regs, psw); @@ -114,7 +115,7 @@ int main(void) OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_PCPU, lowcore, pcpu); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); @@ -186,5 +187,7 @@ int main(void) #endif OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + + OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 467ed4dba817b08b4d4e17d57165f886e40f6cda..14d324865e33fc99790597fc28d11c91394eefe7 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); +decompressor_handled_param(relocate_lowcore); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -190,13 +191,6 @@ static noinline __init void setup_lowcore_early(void) get_lowcore()->preempt_count = INIT_PREEMPT_COUNT; } -static noinline __init void setup_facility_list(void) -{ - memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); - if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) - __clear_facility(82, alt_stfle_fac_list); -} - static __init void detect_diag9c(void) { unsigned int cpu_address; @@ -291,7 +285,6 @@ void __init startup_init(void) lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - setup_facility_list(); detect_machine_type(); setup_arch_string(); setup_boot_command_line(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 454b6b92c7f84294e7afaca7beabb9446b5b40d6..749410cfdbc078d5ee5c53de946e6ae29a502959 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,49 +28,54 @@ #include #include #include +#include _LPP_OFFSET = __LC_LPP .macro STBEAR address - ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193) .endm .macro LBEAR address - ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm - .macro LPSWEY address,lpswe - ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193 + .macro LPSWEY address, lpswe + ALTERNATIVE_2 "b \lpswe;nopr", \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \ + __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ + ALT_LOWCORE .endm - .macro MBEAR reg - ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 + .macro MBEAR reg, lowcore + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\ + ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea + .macro CHECK_STACK savearea, lowcore #ifdef CONFIG_CHECK_STACK tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - lghi %r14,\savearea + la %r14,\savearea(\lowcore) jz stack_overflow #endif .endm - .macro CHECK_VMAP_STACK savearea,oklabel + .macro CHECK_VMAP_STACK savearea, lowcore, oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET - clg %r14,__LC_KERNEL_STACK + clg %r14,__LC_KERNEL_STACK(\lowcore) je \oklabel - clg %r14,__LC_ASYNC_STACK + clg %r14,__LC_ASYNC_STACK(\lowcore) je \oklabel - clg %r14,__LC_MCCK_STACK + clg %r14,__LC_MCCK_STACK(\lowcore) je \oklabel - clg %r14,__LC_NODAT_STACK + clg %r14,__LC_NODAT_STACK(\lowcore) je \oklabel - clg %r14,__LC_RESTART_STACK + clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel - lghi %r14,\savearea + la %r14,\savearea(\lowcore) j stack_overflow #else j \oklabel @@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82) .endm .macro BPON - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm .macro BPENTER tif_ptr,tif_mask ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ - "j .+12; nop; nop", 82 + "j .+12; nop; nop", ALT_SPEC(82) .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ - "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm #if IS_ENABLED(CONFIG_KVM) - .macro SIEEXIT sie_control - lg %r9,\sie_control # get control block pointer - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + .macro SIEEXIT sie_control,lowcore + lg %r9,\sie_control # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lg %r9,__LC_CURRENT(\lowcore) + mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm) stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lg %r15,0(%r4,%r3) # start of kernel stack of next agr %r15,%r5 # end of kernel stack of next - stg %r3,__LC_CURRENT # store task struct of next - stg %r15,__LC_KERNEL_STACK # store end of kernel stack + GET_LC %r13 + stg %r3,__LC_CURRENT(%r13) # store task struct of next + stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next + mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -183,15 +190,16 @@ SYM_FUNC_END(__switch_to_asm) */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - lg %r12,__LC_CURRENT + GET_LC %r13 + lg %r14,__LC_CURRENT(%r13) stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 - mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 - oi __LC_CPU_FLAGS+7,_CIF_SIE + mvi __TI_sie(%r14),1 lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -210,8 +218,10 @@ SYM_FUNC_START(__sie64a) .Lsie_skip: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + GET_LC %r14 + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lg %r14,__LC_CURRENT(%r14) + mvi __TI_sie(%r14),0 # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -254,14 +264,15 @@ EXPORT_SYMBOL(sie_exit) */ SYM_CODE_START(system_call) - stpt __LC_SYS_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_SYNC + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF lghi %r14,0 .Lsysc_per: - STBEAR __LC_LAST_BREAK - lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK + STBEAR __LC_LAST_BREAK(%r13) + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) # clear user controlled register to prevent speculative use @@ -276,17 +287,17 @@ SYM_CODE_START(system_call) xgr %r10,%r10 xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC - MBEAR %r2 + mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13) + MBEAR %r2,%r13 lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(system_call) @@ -297,12 +308,13 @@ SYM_CODE_START(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + GET_LC %r13 + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(ret_from_fork) @@ -311,39 +323,40 @@ SYM_CODE_END(ret_from_fork) */ SYM_CODE_START(pgm_check_handler) - stpt __LC_SYS_ENTER_TIMER + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_SYNC lgr %r10,%r15 - lmg %r8,%r9,__LC_PGM_OLD_PSW + lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) j 3f # -> fault in user space .Lpgm_skip_asce: 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception + tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -3: lg %r15,__LC_KERNEL_STACK + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f +3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13) + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - ltg %r12,__LC_GMAP + ltg %r12,__LC_GMAP(%r13) jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10) + SIEEXIT __SF_SIE_CONTROL(%r10),%r13 #endif 5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -359,11 +372,11 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -372,11 +385,11 @@ SYM_CODE_START(pgm_check_handler) # single stepped system call # .Lpgm_svcper: - mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13) larl %r14,.Lsysc_per - stg %r14,__LC_RETURN_PSW+8 + stg %r14,__LC_RETURN_PSW+8(%r13) lghi %r14,1 - LBEAR __LC_PGM_LAST_BREAK + LBEAR __LC_PGM_LAST_BREAK(%r13) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per SYM_CODE_END(pgm_check_handler) @@ -385,25 +398,27 @@ SYM_CODE_END(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler SYM_CODE_START(\name) - stckf __LC_INT_CLOCK - stpt __LC_SYS_ENTER_TIMER - STBEAR __LC_LAST_BREAK + STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC + GET_LC %r13 + stckf __LC_INT_CLOCK(%r13) + stpt __LC_SYS_ENTER_TIMER(%r13) + STBEAR __LC_LAST_BREAK(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lmg %r8,%r9,\lc_old_psw + lmg %r8,%r9,\lc_old_psw(%r13) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA_ASYNC +0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK +1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -417,18 +432,18 @@ SYM_CODE_START(\name) xgr %r7,%r7 xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - MBEAR %r11 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13) + MBEAR %r11,%r13 stmg %r8,%r9,__PT_PSW(%r11) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,\handler - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -443,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq */ SYM_CODE_START(mcck_int_handler) BPOFF - lmg %r8,%r9,__LC_MCK_OLD_PSW - TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE + GET_LC %r13 + lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID jno .Lmcck_panic # control registers invalid -> panic ptlb - lghi %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID + lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYS_ENTER_TIMER - clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER(%r13) + clc 0(8,%r14),__LC_EXIT_TIMER(%r13) jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + la %r14,__LC_EXIT_TIMER(%r13) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13) jl 2f - la %r14,__LC_LAST_UPDATE_TIMER + la %r14,__LC_LAST_UPDATE_TIMER(%r13) 2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) +3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? jnz .Lmcck_user - TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz .Lmcck_user - # Need to compare the address instead of a CIF_SIE* flag. + # Need to compare the address instead of __TI_SIE flag. # Otherwise there would be a race between setting the flag # and entering SIE (or leaving and clearing the flag). This # would cause machine checks targeted at the guest to be @@ -480,18 +497,19 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST + lg %r10,__LC_PCPU + oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif .Lmcck_user: - lg %r15,__LC_MCCK_STACK + lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r14,__LC_GPREGS_SAVE_AREA+64 - stmg %r0,%r7,__PT_R0(%r11) + lay %r14,__LC_GPREGS_SAVE_AREA(%r13) + mvc __PT_R0(128,%r11),0(%r14) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -501,7 +519,6 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 - mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -509,12 +526,13 @@ SYM_CODE_START(mcck_int_handler) brasl %r14,s390_do_machine_check lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? jno 0f BPON - stpt __LC_EXIT_TIMER -0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 + stpt __LC_EXIT_TIMER(%r13) +0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\ + ALT_FACILITY(193) LBEAR 0(%r12) lmg %r11,%r15,__PT_R11(%r11) LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE @@ -550,7 +568,7 @@ SYM_CODE_START(mcck_int_handler) SYM_CODE_END(mcck_int_handler) SYM_CODE_START(restart_int_handler) - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f @@ -558,15 +576,17 @@ SYM_CODE_START(restart_int_handler) 0: larl %r15,daton_psw lpswe 0(%r15) # turn dat on, keep irqs off .Ldaton: - lg %r15,__LC_RESTART_STACK + GET_LC %r15 + lg %r15,__LC_RESTART_STACK(%r15) xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW + GET_LC %r13 + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13) + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - lg %r1,__LC_RESTART_FN # load fn, parm & source cpu - lg %r2,__LC_RESTART_DATA - lgf %r3,__LC_RESTART_SOURCE + lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA(%r13) + lgf %r3,__LC_RESTART_SOURCE(%r13) ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu @@ -588,7 +608,8 @@ SYM_CODE_END(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ SYM_CODE_START(stack_overflow) - lg %r15,__LC_NODAT_STACK # change to panic stack + GET_LC %r15 + lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index fa90bbdc5ef9467c70dcba01f11e2d2ed273e115..6f2e87920288ac6d377279470a1763805fb0b71a 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags) int mask; if (flags & KERNEL_FPC) - fpu_lfpc(&state->fpc); + fpu_lfpc_safe(&state->fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_V0V7) load_fp_regs_vx(state->vxrs); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 45413b04efc525a8eed9aeb9d16476a8c20a5463..396034b2fe678238c37b1052316f837d4c291cbc 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,14 +19,15 @@ __HEAD SYM_CODE_START(startup_continue) larl %r1,tod_clock_base - mvc 0(16,%r1),__LC_BOOT_CLOCK + GET_LC %r2 + mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # larl %r14,init_task - stg %r14,__LC_CURRENT + stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET - stg %r15,__LC_KERNEL_STACK + stg %r15,__LC_KERNEL_STACK(%r2) brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 3a7d6e1722112ac37813329309cab839cc454580..f17bb7bf939242c4a012cfff1dd0b0e6191354f4 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2112,7 +2112,7 @@ void do_restart(void *arg) tracing_off(); debug_locks_off(); lgr_info_log(); - smp_call_online_cpu(__do_restart, arg); + smp_call_ipl_cpu(__do_restart, arg); } /* on halt */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f4cf65da6d499f30f3ffe348263ab5b919426679..8f681ccfb83a1d6e21b5b9d707706c4796ca6977 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data) * This need to be done *after* s390_reset_system set the * prefix register of this CPU to zero */ - memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), + memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); call_nodat(1, int, purgatory, int, 1); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9b8c24ebb00858561df634ac3a17124d53bc92e9..e11ec15960a18f77d208e3e50bcc31177b91a3af 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -4,6 +4,8 @@ #include #include +int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP); + static int __init nobp_setup_early(char *str) { bool enabled; @@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str) * The user explicitly requested nobp=1, enable it and * disable the expoline support. */ - __set_facility(82, alt_stfle_fac_list); + nobp = 1; if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_disable = 1; } else { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } return 0; } @@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early); static int __init nospec_setup_early(char *str) { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; return 0; } early_param("nospec", nospec_setup_early); @@ -40,7 +42,7 @@ static int __init nospec_report(void) pr_info("Spectre V2 mitigation: etokens\n"); if (nospec_uses_trampoline()) pr_info("Spectre V2 mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } @@ -66,14 +68,14 @@ void __init nospec_auto_detect(void) */ if (__is_defined(CC_USING_EXPOLINE)) nospec_disable = 1; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } else if (__is_defined(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. */ nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } /* * If the kernel has not been compiled with expolines the @@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } if (str && !strncmp(str, "off", 3)) nospec_disable = 1; diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 52d4353188ad8cb3328dabe3f437f2e57fa6baaa..a951888186370ddb7c72c8db34e91269997c9e45 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Mitigation: etokens\n"); if (nospec_uses_trampoline()) return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) return sprintf(buf, "Mitigation: limited branch prediction\n"); return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1434642e9cba0d5c35bda8e77cc292d1cf64df20..6968be98af11790ff3f771ec5b2ca8a9a7f9c58c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) struct cf_trailer_entry *trailer_start, *trailer_stop; struct cf_ctrset_entry *ctrstart, *ctrstop; size_t offset = 0; + int i; - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; - do { + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + /* Counter set not authorized */ + if (!(auth & cpumf_ctr_ctl[i])) + continue; + /* Counter set size zero was not saved */ + if (!cpum_cf_read_setsize(i)) + continue; + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { pr_err_once("cpum_cf_diag counter set compare error " "in set %i\n", ctrstart->set); return 0; } - auth &= ~cpumf_ctr_ctl[ctrstart->set]; if (ctrstart->def == CF_DIAG_CTRSET_DEF) { cfdiag_diffctrset((u64 *)(ctrstart + 1), (u64 *)(ctrstop + 1), ctrstart->ctr); offset += ctrstart->ctr * sizeof(u64) + sizeof(*ctrstart); } - } while (ctrstart->def && auth); + } /* Save time_stamp from start of event in stop's trailer */ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 65c1464eea4fcc210ca6ed65c7ce843d2ea28fe9..5ce9a795a0fe1b4f3d2afd03e1093b1642308a3a 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -17,7 +17,8 @@ #include #include #include - +#include +#include #include #include #include @@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) } } +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} + /* * cpu_init - initializes state that is per-CPU. */ diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 88087a32ebc6ef2ec2b1cbd2682056989795531c..69fcaf54d5ca5e34ba311d743ef27e482dd2e25c 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,7 @@ #include #include #include +#include GEN_BR_THUNK %r9 @@ -20,20 +21,15 @@ # r3 = Parameter for function # SYM_CODE_START(store_status) - /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_RESTART + STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA /* General purpose registers */ - lghi %r1,__LC_GPREGS_SAVE_AREA - stmg %r0,%r15,0(%r1) - mvc 8(8,%r1),__LC_SAVE_AREA_RESTART + GET_LC %r13 /* Control registers */ - lghi %r1,__LC_CREGS_SAVE_AREA - stctg %c0,%c15,0(%r1) + stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13) /* Access registers */ - lghi %r1,__LC_AREGS_SAVE_AREA - stam %a0,%a15,0(%r1) + stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13) /* Floating point registers */ - lghi %r1,__LC_FPREGS_SAVE_AREA + lay %r1,__LC_FPREGS_SAVE_AREA(%r13) std %f0, 0x00(%r1) std %f1, 0x08(%r1) std %f2, 0x10(%r1) @@ -51,21 +47,21 @@ SYM_CODE_START(store_status) std %f14,0x70(%r1) std %f15,0x78(%r1) /* Floating point control register */ - lghi %r1,__LC_FP_CREG_SAVE_AREA + lay %r1,__LC_FP_CREG_SAVE_AREA(%r13) stfpc 0(%r1) /* CPU timer */ - lghi %r1,__LC_CPU_TIMER_SAVE_AREA + lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13) stpt 0(%r1) /* Store prefix register */ - lghi %r1,__LC_PREFIX_SAVE_AREA + lay %r1,__LC_PREFIX_SAVE_AREA(%r13) stpx 0(%r1) /* Clock comparator - seven bytes */ - lghi %r1,__LC_CLOCK_COMP_SAVE_AREA larl %r4,clkcmp stckc 0(%r4) + lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13) mvc 1(7,%r1),1(%r4) /* Program status word */ - lghi %r1,__LC_PSW_SAVE_AREA + lay %r1,__LC_PSW_SAVE_AREA(%r13) epsw %r4,%r5 st %r4,0(%r1) st %r5,4(%r1) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3993f4caf224ab02d670bad135de8641baee9225..4ec99f73fa27e19b2cc612a92ac3507206bc23ac 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable); struct physmem_info __bootdata(physmem_info); struct vm_layout __bootdata_preserved(vm_layout); -EXPORT_SYMBOL_GPL(vm_layout); +EXPORT_SYMBOL(vm_layout); int __bootdata_preserved(__kaslr_enabled); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); -u64 alt_stfle_fac_list[16]; struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; @@ -406,6 +405,7 @@ static void __init setup_lowcore(void) panic("%s: Failed to allocate %zu bytes align=%zx\n", __func__, sizeof(*lc), sizeof(*lc)); + lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0); lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; lc->restart_psw.addr = __pa(restart_int_handler); lc->external_new_psw.mask = PSW_KERNEL_BITS; @@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p) else pr_info("Linux is running as a guest in 64-bit mode\n"); + if (have_relocated_lowcore()) + pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); + log_component_list(); /* Have one command line that is parsed and saved in /proc/cmdline */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c3c54adf67bc23a7683376558ad6425cb4cb8cbc..fbba37ec53cf7d507c4890c21a02b93a1ad280d5 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,16 +74,15 @@ enum { CPU_STATE_CONFIGURED, }; -struct pcpu { - unsigned long ec_mask; /* bit mask for ec_xxx functions */ - unsigned long ec_clk; /* sigp timestamp for ec_xxx */ - signed char state; /* physical cpu state */ - signed char polarization; /* physical polarization */ - u16 address; /* physical cpu address */ -}; - static u8 boot_core_type; -static struct pcpu pcpu_devices[NR_CPUS]; +DEFINE_PER_CPU(struct pcpu, pcpu_devices); +/* + * Pointer to the pcpu area of the boot CPU. This is required when a restart + * interrupt is triggered on an offline CPU. For that case accessing percpu + * data with the common primitives does not work, since the percpu offset is + * stored in a non existent lowcore. + */ +static struct pcpu *ipl_pcpu; unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -174,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) int cpu; for_each_cpu(cpu, mask) - if (pcpu_devices[cpu].address == address) - return pcpu_devices + cpu; + if (per_cpu(pcpu_devices, cpu).address == address) + return &per_cpu(pcpu_devices, cpu); return NULL; } @@ -230,13 +229,11 @@ out: return -ENOMEM; } -static void pcpu_free_lowcore(struct pcpu *pcpu) +static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; async_stack = lc->async_stack - STACK_INIT_OFFSET; @@ -259,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->pcpu = (unsigned long)pcpu; lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -277,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) arch_spin_lock_setup(cpu); } -static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +static void pcpu_attach_task(int cpu, struct task_struct *tsk) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)tsk; @@ -296,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->steal_timer = 0; } -static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; lc->restart_source = -1U; - pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); + pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); } typedef void (pcpu_delegate_fn)(void *); @@ -320,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, +static void pcpu_delegate(struct pcpu *pcpu, int cpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; - lc = lowcore_ptr[pcpu - pcpu_devices]; + lc = lowcore_ptr[cpu]; source_cpu = stap(); if (pcpu->address == source_cpu) { @@ -377,26 +371,11 @@ static int pcpu_set_smt(unsigned int mtid) smp_cpu_mt_shift = 0; while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) smp_cpu_mt_shift++; - pcpu_devices[0].address = stap(); + per_cpu(pcpu_devices, 0).address = stap(); } return cc; } -/* - * Call function on an online CPU. - */ -void smp_call_online_cpu(void (*func)(void *), void *data) -{ - struct pcpu *pcpu; - - /* Use the current cpu if it is online. */ - pcpu = pcpu_find_address(cpu_online_mask, stap()); - if (!pcpu) - /* Use the first online cpu. */ - pcpu = pcpu_devices + cpumask_first(cpu_online_mask); - pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); -} - /* * Call function on the ipl CPU. */ @@ -404,11 +383,10 @@ void smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; - if (pcpu_devices[0].address == stap()) + if (ipl_pcpu->address == stap()) lc = get_lowcore(); - pcpu_delegate(&pcpu_devices[0], func, data, - lc->nodat_stack); + pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -416,21 +394,21 @@ int smp_find_processor_id(u16 address) int cpu; for_each_present_cpu(cpu) - if (pcpu_devices[cpu].address == address) + if (per_cpu(pcpu_devices, cpu).address == address) return cpu; return -1; } void schedule_mcck_handler(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); } bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; - if (pcpu_running(pcpu_devices + cpu)) + if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) return false; return true; } @@ -442,7 +420,7 @@ void notrace smp_yield_cpu(int cpu) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); + : : "d" (per_cpu(pcpu_devices, cpu).address)); } EXPORT_SYMBOL_GPL(smp_yield_cpu); @@ -463,7 +441,7 @@ void notrace smp_emergency_stop(void) end = get_tod_clock() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && @@ -472,7 +450,7 @@ void notrace smp_emergency_stop(void) } while (get_tod_clock() < end) { for_each_cpu(cpu, &cpumask) - if (pcpu_stopped(pcpu_devices + cpu)) + if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); if (cpumask_empty(&cpumask)) break; @@ -487,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop); */ void smp_send_stop(void) { + struct pcpu *pcpu; int cpu; /* Disable all interrupts/machine checks */ @@ -502,8 +481,9 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; - pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu_devices + cpu)) + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -517,7 +497,7 @@ static void smp_handle_ext_call(void) unsigned long bits; /* handle bit signal external calls */ - bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) @@ -542,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } /* @@ -557,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu) */ void arch_smp_send_reschedule(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_schedule); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); } #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); } #endif @@ -575,7 +555,7 @@ int smp_store_status(int cpu) struct pcpu *pcpu; unsigned long pa; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); lc = lowcore_ptr[cpu]; pa = __pa(&lc->floating_pt_save_area); if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, @@ -683,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void) void smp_cpu_set_polarization(int cpu, int val) { - pcpu_devices[cpu].polarization = val; + per_cpu(pcpu_devices, cpu).polarization = val; } int smp_cpu_get_polarization(int cpu) { - return pcpu_devices[cpu].polarization; + return per_cpu(pcpu_devices, cpu).polarization; } int smp_cpu_get_cpu_address(int cpu) { - return pcpu_devices[cpu].address; + return per_cpu(pcpu_devices, cpu).address; } static void __ref smp_get_core_info(struct sclp_core_info *info, int early) @@ -732,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { if (pcpu_find_address(cpu_present_mask, address + i)) continue; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); pcpu->address = address + i; if (configured) pcpu->state = CPU_STATE_CONFIGURED; @@ -767,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) * that all SMT threads get subsequent logical CPU numbers. */ if (early) { - core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; for (i = 0; i < info->configured; i++) { core = &info->core[i]; if (core->core_id == core_id) { @@ -867,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); int rc; if (pcpu->state != CPU_STATE_CONFIGURED) @@ -885,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) */ system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, tidle); - pcpu_start_fn(pcpu, smp_start_secondary, NULL); + pcpu_attach_task(cpu, tidle); + pcpu_start_fn(cpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); @@ -931,18 +911,19 @@ void __cpu_die(unsigned int cpu) struct pcpu *pcpu; /* Wait until target cpu is down */ - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); while (!pcpu_stopped(pcpu)) cpu_relax(); - pcpu_free_lowcore(pcpu); + pcpu_free_lowcore(pcpu, cpu); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + pcpu->flags = 0; } void __noreturn cpu_die(void) { idle_task_exit(); - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); for (;;) ; } @@ -972,11 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { - struct pcpu *pcpu = pcpu_devices; + struct lowcore *lc = get_lowcore(); WARN_ON(!cpu_present(0) || !cpu_online(0)); - pcpu->state = CPU_STATE_CONFIGURED; - get_lowcore()->percpu_offset = __per_cpu_offset[0]; + lc->percpu_offset = __per_cpu_offset[0]; + ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); + ipl_pcpu->state = CPU_STATE_CONFIGURED; + lc->pcpu = (unsigned long)ipl_pcpu; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } @@ -984,8 +967,8 @@ void __init smp_setup_processor_id(void) { struct lowcore *lc = get_lowcore(); - pcpu_devices[0].address = stap(); lc->cpu_nr = 0; + per_cpu(pcpu_devices, 0).address = stap(); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; } @@ -1007,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -1033,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_online(cpu + i)) goto out; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); rc = 0; switch (val) { case 0: @@ -1045,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_STANDBY; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1060,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_CONFIGURED; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1079,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); + return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -1105,14 +1088,14 @@ static struct attribute_group cpu_online_attr_group = { static int smp_cpu_online(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); } static int smp_cpu_pre_down(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); return 0; @@ -1125,7 +1108,7 @@ bool arch_cpu_is_hotpluggable(int cpu) int arch_register_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); int rc; c->hotpluggable = arch_cpu_is_hotpluggable(cpu); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index fa62fa0e369f2736216d3509f51c35d50c32a1c0..36db065c7cf75fddfac665072a273236f95639b3 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -18,11 +18,22 @@ #include #include +#if !IS_ENABLED(CONFIG_KVM) +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + return 0; +} + +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + return 0; +} +#endif + /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); -#endif /* * uv_info contains both host and guest information but it's currently only @@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); EXPORT_SYMBOL(uv_info); -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); @@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page) return arch_make_folio_accessible(page_folio(page)); } EXPORT_SYMBOL_GPL(arch_make_page_accessible); -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = { static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST - val = prot_virt_guest; -#endif - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_guest); } static ssize_t uv_is_prot_virt_host(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#if IS_ENABLED(CONFIG_KVM) - val = prot_virt_host; -#endif - - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_host); } static struct kobj_attribute uv_prot_virt_guest = @@ -790,4 +786,3 @@ out_kobj: return rc; } device_initcall(uv_info_init); -#endif diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a1ce3925ec719055d14dfbeb8e5eb63416bc2d31..e67cd409b8587117925e2c0d1e4e8ec7eb126ea0 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -59,14 +59,6 @@ SECTIONS } :text = 0x0700 RO_DATA(PAGE_SIZE) - .data.rel.ro : { - *(.data.rel.ro .data.rel.ro.*) - } - .got : { - __got_start = .; - *(.got) - __got_end = .; - } . = ALIGN(PAGE_SIZE); _sdata = .; /* Start of data section */ @@ -80,6 +72,15 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) .data.rel : { *(.data.rel*) @@ -190,6 +191,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); @@ -219,6 +223,8 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) + QUAD(__alt_instructions) + QUAD(__alt_instructions_end) #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bf8534218af3dcdf7be20f58a8e95c904cb2b9c0..e680c6bf0c9d94dea8dd67657472610ed9a06a37 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); + u32 gd; + + if (!kvm->arch.gisa_int.origin) + return 0; + + gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 0c9a73a18826ca7ecb7cc13851f3cbe9b69dfe59..9f86ad8fa8b4dca716de58d4fe0313cdc4818f31 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 45db5f47b22dd7db154f4c982192d1652cd4549a..0a67fcee44141e966ff514784752c10a6973167d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,13 +16,15 @@ static unsigned long max_addr; struct addr_marker { + int is_start; unsigned long start_address; const char *name; }; enum address_markers_idx { - IDENTITY_BEFORE_NR = 0, - IDENTITY_BEFORE_END_NR, + KVA_NR = 0, + LOWCORE_START_NR, + LOWCORE_END_NR, AMODE31_START_NR, AMODE31_END_NR, KERNEL_START_NR, @@ -30,12 +33,22 @@ enum address_markers_idx { KFENCE_START_NR, KFENCE_END_NR, #endif - IDENTITY_AFTER_NR, - IDENTITY_AFTER_END_NR, + IDENTITY_START_NR, + IDENTITY_END_NR, VMEMMAP_NR, VMEMMAP_END_NR, VMALLOC_NR, VMALLOC_END_NR, +#ifdef CONFIG_KMSAN + KMSAN_VMALLOC_SHADOW_START_NR, + KMSAN_VMALLOC_SHADOW_END_NR, + KMSAN_VMALLOC_ORIGIN_START_NR, + KMSAN_VMALLOC_ORIGIN_END_NR, + KMSAN_MODULES_SHADOW_START_NR, + KMSAN_MODULES_SHADOW_END_NR, + KMSAN_MODULES_ORIGIN_START_NR, + KMSAN_MODULES_ORIGIN_END_NR, +#endif MODULES_NR, MODULES_END_NR, ABS_LOWCORE_NR, @@ -49,33 +62,44 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { - [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, - [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, - [AMODE31_START_NR] = {0, "Amode31 Area Start"}, - [AMODE31_END_NR] = {0, "Amode31 Area End"}, - [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [KVA_NR] = {0, 0, "Kernel Virtual Address Space"}, + [LOWCORE_START_NR] = {1, 0, "Lowcore Start"}, + [LOWCORE_END_NR] = {0, 0, "Lowcore End"}, + [IDENTITY_START_NR] = {1, 0, "Identity Mapping Start"}, + [IDENTITY_END_NR] = {0, 0, "Identity Mapping End"}, + [AMODE31_START_NR] = {1, 0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, 0, "Amode31 Area End"}, + [KERNEL_START_NR] = {1, (unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {0, (unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE - [KFENCE_START_NR] = {0, "KFence Pool Start"}, - [KFENCE_END_NR] = {0, "KFence Pool End"}, + [KFENCE_START_NR] = {1, 0, "KFence Pool Start"}, + [KFENCE_END_NR] = {0, 0, "KFence Pool End"}, +#endif + [VMEMMAP_NR] = {1, 0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, 0, "vmemmap Area End"}, + [VMALLOC_NR] = {1, 0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, 0, "vmalloc Area End"}, +#ifdef CONFIG_KMSAN + [KMSAN_VMALLOC_SHADOW_START_NR] = {1, 0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, 0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {1, 0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, 0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {1, 0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, 0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {1, 0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, 0, "Kmsan Modules Origins End"}, #endif - [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, - [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, - [VMEMMAP_NR] = {0, "vmemmap Area Start"}, - [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, - [VMALLOC_NR] = {0, "vmalloc Area Start"}, - [VMALLOC_END_NR] = {0, "vmalloc Area End"}, - [MODULES_NR] = {0, "Modules Area Start"}, - [MODULES_END_NR] = {0, "Modules Area End"}, - [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, - [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, - [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, - [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, + [MODULES_NR] = {1, 0, "Modules Area Start"}, + [MODULES_END_NR] = {0, 0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {1, 0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, 0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {1, 0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, 0, "Real Memory Copy Area End"}, #ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, - [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, + [KASAN_SHADOW_START_NR] = {1, KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {0, KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - { -1, NULL } + {1, -1UL, NULL} }; struct pg_state { @@ -143,6 +167,19 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level) +{ + struct seq_file *m = st->seq; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -166,9 +203,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, addr = max_addr; if (st->level == -1) { pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { note_prot_wx(st, addr); @@ -182,13 +217,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } } @@ -260,22 +289,25 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ -/* - * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple - * insertion sort to preserve the original order of markers with the same - * start address. - */ -static void sort_address_markers(void) +static int ptdump_cmp(const void *a, const void *b) { - struct addr_marker tmp; - int i, j; + const struct addr_marker *ama = a; + const struct addr_marker *amb = b; - for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { - tmp = address_markers[i]; - for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) - address_markers[j + 1] = address_markers[j]; - address_markers[j + 1] = tmp; - } + if (ama->start_address > amb->start_address) + return 1; + if (ama->start_address < amb->start_address) + return -1; + /* + * If the start addresses of two markers are identical consider the + * marker which defines the start of an area higher than the one which + * defines the end of an area. This keeps pairs of markers sorted. + */ + if (ama->is_start) + return 1; + if (amb->is_start) + return -1; + return 0; } static int pt_dump_init(void) @@ -283,6 +315,8 @@ static int pt_dump_init(void) #ifdef CONFIG_KFENCE unsigned long kfence_start = (unsigned long)__kfence_pool; #endif + unsigned long lowcore = (unsigned long)get_lowcore(); + /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions @@ -290,7 +324,10 @@ static int pt_dump_init(void) */ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[LOWCORE_START_NR].start_address = lowcore; + address_markers[LOWCORE_END_NR].start_address = lowcore + sizeof(struct lowcore); + address_markers[IDENTITY_START_NR].start_address = __identity_base; + address_markers[IDENTITY_END_NR].start_address = __identity_base + ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; @@ -307,7 +344,18 @@ static int pt_dump_init(void) address_markers[KFENCE_START_NR].start_address = kfence_start; address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; #endif - sort_address_markers(); +#ifdef CONFIG_KMSAN + address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START; + address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END; + address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START; + address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END; + address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START; + address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END; + address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; + address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; +#endif + sort(address_markers, ARRAY_SIZE(address_markers) - 1, + sizeof(address_markers[0]), ptdump_cmp, NULL); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ddcd39ef43463ade94bc3c94432a0afe6d6840d0..e3d258f9e72610a9d28bc77bdb580daddfcd7db6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -108,6 +108,8 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; + if (MACHINE_HAS_NX) + system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } @@ -170,13 +172,6 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ } -void free_initmem(void) -{ - set_memory_rwnx((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - unsigned long memory_block_size_bytes(void) { /* diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 632c3a55feedf140b80c47302dff2399bf310ec3..28a18c42ba99c3063f7fdcdc8368cf45ca425735 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz } /* - * s390_kernel_write - write to kernel memory bypassing DAT + * __s390_kernel_write - write to kernel memory bypassing DAT * @dst: destination address * @src: source address * @size: number of bytes to copy @@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -notrace void *s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *__s390_kernel_write(void *dst, const void *src, size_t size) { void *tmp = dst; unsigned long flags; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 41c714e212927b12dad4aceba5c73b49169531e3..665b8228afebcdaa8b25cc3caf19d20b95a13fc5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -661,7 +661,6 @@ void __init vmem_map_init(void) { __set_memory_rox(_stext, _etext); __set_memory_ro(_etext, __end_rodata); - __set_memory_rox(_sinittext, _einittext); __set_memory_rox(__stext_amode31, __etext_amode31); /* * If the BEAR-enhancement facility is not installed the first @@ -670,16 +669,8 @@ void __init vmem_map_init(void) */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - if (debug_pagealloc_enabled()) { - /* - * Use RELOC_HIDE() as long as __va(0) translates to NULL, - * since performing pointer arithmetic on a NULL pointer - * has undefined behavior and generates compiler warnings. - */ - __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); - } - if (MACHINE_HAS_NX) - system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); + if (debug_pagealloc_enabled()) + __set_memory_4k(__va(0), __va(0) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 0ef83b6ac0db7466d4db64ec8a1555b3e66ca499..84482a92133220f446e30ae61b6af196aa17b4b4 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, } } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, + unsigned long *bit) { - struct zpci_dev *zdev = to_zpci(pdev); - unsigned int hwirq, msi_vecs, cpu; - unsigned long bit; - struct msi_desc *msi; - struct msi_msg msg; - int cpu_addr; - int rc, irq; - - zdev->aisb = -1UL; - zdev->msi_first_bit = -1U; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); - if (irq_delivery == DIRECTED) { /* Allocate cpu vector bits */ - bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); - if (bit == -1UL) + *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); + if (*bit == -1UL) return -EIO; } else { /* Allocate adapter summary indicator bit */ - bit = airq_iv_alloc_bit(zpci_sbv); - if (bit == -1UL) + *bit = airq_iv_alloc_bit(zpci_sbv); + if (*bit == -1UL) return -EIO; - zdev->aisb = bit; + zdev->aisb = *bit; /* Create adapter interrupt vector */ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); @@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -ENOMEM; /* Wire up shortcut pointer */ - zpci_ibv[bit] = zdev->aibv; + zpci_ibv[*bit] = zdev->aibv; /* Each function has its own interrupt vector */ - bit = 0; + *bit = 0; } + return 0; +} - /* Request MSI interrupts */ +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; + struct zpci_dev *zdev = to_zpci(pdev); + struct msi_desc *msi; + struct msi_msg msg; + unsigned long bit; + int cpu_addr; + int rc, irq; + + zdev->aisb = -1UL; + zdev->msi_first_bit = -1U; + + msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); + if (msi_vecs < nvec) { + pr_info("%s requested %d irqs, allocate system limit of %d", + pci_name(pdev), nvec, zdev->max_msi); + } + + rc = __alloc_airq(zdev, msi_vecs, &bit); + if (rc < 0) + return rc; + + /* + * Request MSI interrupts: + * When using MSI, nvec_used interrupt sources and their irq + * descriptors are controlled through one msi descriptor. + * Thus the outer loop over msi descriptors shall run only once, + * while two inner loops iterate over the interrupt vectors. + * When using MSI-X, each interrupt vector/irq descriptor + * is bound to exactly one msi descriptor (nvec_used is one). + * So the inner loops are executed once, while the outer iterates + * over the MSI-X descriptors. + */ hwirq = bit; msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); + irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); + irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; - rc = irq_set_msi_desc(irq, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq, &zpci_irq_chip, - handle_percpu_irq); + + for (i = 0; i < irqs_per_msi; i++) { + rc = irq_set_msi_desc_off(irq, i, msi); + if (rc) + return rc; + irq_set_chip_and_handler(irq + i, &zpci_irq_chip, + handle_percpu_irq); + } + msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { if (msi->affinity) @@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) msg.address_lo |= (cpu_addr << 8); for_each_possible_cpu(cpu) { - airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zpci_ibv[cpu], + hwirq + i, irq + i); } } else { msg.address_lo = zdev->msi_addr & 0xffffffff; - airq_iv_set_data(zdev->aibv, hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); } msg.address_hi = zdev->msi_addr >> 32; pci_write_msi_msg(irq, &msg); - hwirq++; + hwirq += irqs_per_msi; } zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = msi_vecs; + zdev->msi_nr_irqs = hwirq - bit; rc = zpci_set_irq(zdev); if (rc) return rc; - return (msi_vecs == nvec) ? 0 : msi_vecs; + return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; } void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); struct msi_desc *msi; + unsigned int i; int rc; /* Disable interrupts */ @@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); + for (i = 0; i < msi->nvec_used; i++) { + irq_set_msi_desc(msi->irq + i, NULL); + irq_free_desc(msi->irq + i); + } msi->msg.address_lo = 0; msi->msg.address_hi = 0; msi->msg.data = 0; diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index e24298a734befd9023f534080bb23680b1f99c7d..a04cd13c6315a2af5f28ad83724d63004e322335 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) return NULL; } +#ifndef MIN #define MIN(a,b) ((a)<(b) ? (a):(b)) +#endif #define STRINGX(x) #x #define STRING(x) STRINGX(x) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc1022de4266bfbeca880fe20c2e1e..de1df0cb45dab2fe8716ee9c4303f032bba5dd5c 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -163,7 +163,7 @@ struct sev_config { */ use_cas : 1, - __reserved : 62; + __reserved : 61; }; static struct sev_config sev_cfg __read_mostly; diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 83073fa3c9891ebfc8d769a8e7a1ee13706a2496..7093ee21c0d1c0410dbf17a77cb70a6bc6736f04 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal @@ -385,7 +386,6 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 12f2a0c14d33b436457b8edff459148797fc3f95..be01823b1bb4537932115cd0c2cfd51761e6eba9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1520,20 +1520,23 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + unsigned long *cntr_mask, *fixed_cntr_mask; + struct event_constraint *pebs_constraints; + struct cpu_hw_events *cpuc; u64 pebs, debugctl; - int cpu = smp_processor_id(); - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); - unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); - struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); - unsigned long flags; - int idx; + int cpu, idx; + + guard(irqsave)(); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + cntr_mask = hybrid(cpuc->pmu, cntr_mask); + fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); + pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); if (!*(u64 *)cntr_mask) return; - local_irq_save(flags); - if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -1577,7 +1580,6 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } - local_irq_restore(flags); } void x86_pmu_stop(struct perf_event *event, int flags) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index be58cfb012dd16c09f9ed7340429d6b6793dc47f..9f116dfc47284567d246ab805a129fc84e9b1adb 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -64,7 +64,7 @@ * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL,ARL,LNL + * RPL,SPR,MTL,ARL,LNL,SRF * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 @@ -693,7 +693,8 @@ static const struct cstate_model srf_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), - .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), }; diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index 6faaf27e8899895854ef126a186d8b39120a9a34..6cbd9ae58b21b3a9b2b50769f2399ded2c2816d4 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,6 +2,10 @@ #ifndef _ASM_X86_CMDLINE_H #define _ASM_X86_CMDLINE_H +#include + +extern char builtin_cmdline[COMMAND_LINE_SIZE]; + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); int cmdline_find_option(const char *cmdline_ptr, const char *option, char *buffer, int bufsize); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 950a03e0181ec3b4daa2c050d0c18db4260134b4..4a68cb3eba78f81d4187cbdc3f5f6b529b001140 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1305,6 +1305,7 @@ struct kvm_arch { u8 vm_type; bool has_private_mem; bool has_protected_state; + bool pre_fault_allowed; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -2191,6 +2192,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, #define kvm_arch_has_private_mem(kvm) false #endif +#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) + static inline u16 kvm_read_ldt(void) { u16 ldt; diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index a053c12939751f5341011bdd157091795b52bf24..68da67df304d5dc6629d1de4723f0abfc93b5d84 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu) #ifdef CONFIG_PARAVIRT /* - * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. * - * Native (and PV wanting native due to vCPU pinning) should disable this key. - * It is done in this backwards fashion to only have a single direction change, - * which removes ordering between native_pv_spin_init() and HV setup. + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. */ -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); /* * Shortcut for the queued_spin_lock_slowpath() function that allows diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 64fbd2dbc5b7615bf35ff8dfe9cacec7df1ca485..a9088250770f2a3d6320040f815f1b9e6cba653b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,11 +62,6 @@ void xen_arch_unregister_cpu(int num); #ifdef CONFIG_PVH void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); -#ifdef CONFIG_XEN_PVH -void __init xen_reserve_extra_memory(struct boot_params *bootp); -#else -static inline void xen_reserve_extra_memory(struct boot_params *bootp) { } -#endif #endif /* Lazy mode for batching updates / context switch */ diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 6cfe762be28bbc72c01cf88c13a7178369d6ada1..d5ef6215583bcf734627058bf9b46dd2b936508f 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -19,7 +19,7 @@ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; /* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index be5889bded498401c6ee6b24ccd9b4567ff90cfc..1e0fe5f8ab84e478294b62267176ae01fa540f31 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -462,7 +462,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x40 ... 0x4f: - case 0x70 ... 0x7f: + case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index b3fa61d45352e7120d7be7fb5cdb9d6f071c9729..0b69bfbf345d0ddf0e88c1ad6125d5f22c8883a4 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -306,7 +306,7 @@ static void freq_invariance_enable(void) WARN_ON_ONCE(1); return; } - static_branch_enable(&arch_scale_freq_key); + static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } @@ -323,8 +323,10 @@ static void __init bp_init_freq_invariance(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (intel_set_max_freq_ratio()) + if (intel_set_max_freq_ratio()) { + guard(cpus_read_lock)(); freq_invariance_enable(); + } } static void disable_freq_invariance_workfn(struct work_struct *work) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 767bf1c71aadda1a63e9d8c43244b219294ac778..2a2fc14955cd3b2b0302486d82b542a280bd36c6 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -609,7 +609,7 @@ void mtrr_save_state(void) { int first_cpu; - if (!mtrr_enabled()) + if (!mtrr_enabled() || !mtrr_state.have_fixed) return; first_cpu = cpumask_first(cpu_online_mask); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5358d43886adc5409196f3e68308658b05bf9872..fec38153355581215eb93b6301ae90b6f0bd06c5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -51,13 +51,12 @@ DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif -DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); void __init native_pv_lock_init(void) { - if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && - !boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_disable(&virt_spin_lock_key); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&virt_spin_lock_key); } static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d34cad9b7b180fed4bd74f947039bc880f1fb53..6129dc2ba784cbdafb445e43479ca708f252f52a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -164,7 +164,7 @@ unsigned long saved_video_mode; static char __initdata command_line[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; bool builtin_cmdline_added __ro_after_init; #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4287a8071a3ab60dfda8c26bf06ca7c92bed4831..472a1537b7a977d0ddfa695e0e941fb417b88f94 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -141,8 +141,8 @@ config KVM_AMD_SEV depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM select KVM_GENERIC_PRIVATE_MEM - select HAVE_KVM_GMEM_PREPARE - select HAVE_KVM_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 923e64903da9afeeff80f76062c45bd5ab076717..913bfc96959cbfe7b6049b7323dfd1f2127f180b 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return HV_STATUS_ACCESS_DENIED; } static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {} -static inline void kvm_hv_free_pa_page(struct kvm *kvm) {} static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector) { return false; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2e7e95ae3c63f337310e8aa6cb083..5bb481aefcbcdc4c393ea0de917dc4ffc576002f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, @@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* @@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, * split to ICR+ICR2 in userspace for backwards compatibility. */ if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4c5de480a80ac181db03a6f1de045..928cf84778b0c5470b64c79132f4ecc9906891e0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, if (req_max_level) max_level = min(max_level, req_max_level); - return req_max_level; + return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, @@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, u64 end; int r; + if (!vcpu->kvm->arch.pre_fault_allowed) + return -EOPNOTSUPP; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) - return kvm_range_has_memory_attributes(kvm, start, end, attrs); + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a16c873b32321802372d87dadd4c062e5bd55171..714c517dd4b72bda0879885c7d1a681f897e7ac2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2276,30 +2276,24 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { struct sev_data_snp_launch_update fw_args = {0}; - bool assigned; + bool assigned = false; int level; - if (!kvm_mem_is_private(kvm, gfn)) { - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", - __func__, gfn); - ret = -EINVAL; - goto err; - } - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); if (ret || assigned) { pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", __func__, gfn, ret, assigned); - ret = -EINVAL; + ret = ret ? -EINVAL : -EEXIST; goto err; } if (src) { void *vaddr = kmap_local_pfn(pfn + i); - ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE); - if (ret) + if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { + ret = -EFAULT; goto err; + } kunmap_local(vaddr); } @@ -2549,6 +2543,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) data->gctx_paddr = __psp_pa(sev->snp_context); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + /* + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages + * can be given to the guest simply by marking the RMP entry as private. + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. + */ + if (!ret) + kvm->arch.pre_fault_allowed = true; + kfree(id_auth); e_free_id_block: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c115d26844f7f6b737bd0423645da08875f7bb50..d6f252555ab3ff0c68ead0ab03013ec8782364ab 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) to_kvm_sev_info(kvm)->need_init = true; kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; } if (!pause_filter_count || !pause_filter_thresh) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c8cf6a37a3584d1645965f44487b87869cab2..70219e4069874aae9d3c8f70890e3e2deb63a67c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void) int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); int err; value = (value & mask) | (msrs->values[slot].host & ~mask); @@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); static void drop_user_return_notifiers(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); if (msrs->registered) kvm_on_user_return(&msrs->urn); @@ -12646,6 +12644,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vm_type = type; kvm->arch.has_private_mem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; ret = kvm_page_track_init(kvm); if (ret) @@ -13641,19 +13642,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_SNP_VM; -} - +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); } #endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { kvm_x86_call(gmem_invalidate)(start, end); diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 384da1fdd5c6f762135a0676de87874405576917..c65cd55504549acd29ebe1ed20a682c9484c5259 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -207,18 +207,29 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, int cmdline_find_option_bool(const char *cmdline, const char *option) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + ret = __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option_bool(builtin_cmdline, COMMAND_LINE_SIZE, option); + + return ret; } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; + + ret = __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option(builtin_cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); - return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, - buffer, bufsize); + return ret; } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a314622aa09346306db47689b4cb21e8d0313385..d066aecf8aeb22d4f6b01e8fe18a7101723e6491 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -88,12 +88,14 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) +#ifndef CONFIG_X86_64 + xor %ecx,%ecx +#endif check_range size=8 ASM_STAC #ifdef CONFIG_X86_64 UACCESS movq (%_ASM_AX),%rdx #else - xor %ecx,%ecx UACCESS movl (%_ASM_AX),%edx UACCESS movl 4(%_ASM_AX),%ecx #endif diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 93e54ba91fbfdbfaf09ffa42a2d8c855187576d7..f5931499c2d6b8170f2555d3cdaf663caefb2d5c 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -110,7 +110,7 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) #define MAX_UNSHARED_PTRS_PER_PGD \ - max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) + MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 2e69abf4f852abb7d8e29c7673dc82f685cf7b89..851ec8f1363a8b389ea4579cc68bf3300a4df27c 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!pmd) return NULL; - /* We can't do anything sensible if we hit a large mapping. */ + /* Large PMD mapping found */ if (pmd_leaf(*pmd)) { - WARN_ON(1); - return NULL; + /* Clear the PMD if we hit a large mapping from the first round */ + if (late_text) { + set_pmd(pmd, __pmd(0)); + } else { + WARN_ON_ONCE(1); + return NULL; + } } if (pmd_none(*pmd)) { @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void) if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) return; - target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false); if (WARN_ON(!target_pte)) return; @@ -301,7 +306,7 @@ enum pti_clone_level { static void pti_clone_pgtable(unsigned long start, unsigned long end, - enum pti_clone_level level) + enum pti_clone_level level, bool late_text) { unsigned long addr; @@ -374,14 +379,14 @@ pti_clone_pgtable(unsigned long start, unsigned long end, */ *target_pmd = *pmd; - addr += PMD_SIZE; + addr = round_up(addr + 1, PMD_SIZE); } else if (level == PTI_CLONE_PTE) { /* Walk the page-table down to the pte level */ pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); continue; } @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, return; /* Allocate PTE in the user page-table */ - target_pte = pti_user_pagetable_walk_pte(addr); + target_pte = pti_user_pagetable_walk_pte(addr, late_text); if (WARN_ON(!target_pte)) return; @@ -401,7 +406,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Clone the PTE */ *target_pte = *pte; - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); } else { BUG(); @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void) phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); pte_t *target_pte; - target_pte = pti_user_pagetable_walk_pte(va); + target_pte = pti_user_pagetable_walk_pte(va, false); if (WARN_ON(!target_pte)) return; @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void) start = CPU_ENTRY_AREA_BASE; end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); - pti_clone_pgtable(start, end, PTI_CLONE_PMD); + pti_clone_pgtable(start, end, PTI_CLONE_PMD, false); } #endif /* CONFIG_X86_64 */ @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry text and force it RO. */ -static void pti_clone_entry_text(void) +static void pti_clone_entry_text(bool late) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_CLONE_PMD); + PTI_LEVEL_KERNEL_IMAGE, late); } /* @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void) * pti_set_kernel_image_nonglobal() did to clear the * global bit. */ - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); + pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false); /* * pti_clone_pgtable() will set the global bit in any PMDs @@ -638,8 +643,15 @@ void __init pti_init(void) /* Undo all global bits from the init pagetables in head_64.S: */ pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ - pti_clone_entry_text(); + /* + * This is very early in boot. Device and Late initcalls can do + * modprobe before free_initmem() and mark_readonly(). This + * pti_clone_entry_text() allows those user-mode-helpers to function, + * but notably the text is still RW. + */ + pti_clone_entry_text(false); pti_setup_espfix64(); pti_setup_vsyscall(); } @@ -656,10 +668,11 @@ void pti_finalize(void) if (!boot_cpu_has(X86_FEATURE_PTI)) return; /* - * We need to clone everything (again) that maps parts of the - * kernel image. + * This is after free_initmem() (all initcalls are done) and we've done + * mark_readonly(). Text is now NX which might've split some PMDs + * relative to the early clone. */ - pti_clone_entry_text(); + pti_clone_entry_text(true); pti_clone_kernel_text(); debug_checkwx_user(); diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 8c2d4b8de25d482694101264674f5886758b00ed..944e0290f2c001810a16a2eee3c2e87a31ba48e0 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -75,9 +75,6 @@ static void __init init_pvh_bootparams(bool xen_guest) } else xen_raw_printk("Warning: Can fit ISA range into e820\n"); - if (xen_guest) - xen_reserve_extra_memory(&pvh_bootparams); - pvh_bootparams.hdr.cmd_line_ptr = pvh_start_info.cmdline_paddr; diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 27a2a02ef8fb1402657754ad5f1ae8c91c6a9d0e..728a4366ca85566829b9feb77525628ed8e808bb 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -27,54 +28,6 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); -void __init xen_pvh_init(struct boot_params *boot_params) -{ - u32 msr; - u64 pfn; - - xen_pvh = 1; - xen_domain_type = XEN_HVM_DOMAIN; - xen_start_flags = pvh_start_info.flags; - - msr = cpuid_ebx(xen_cpuid_base() + 2); - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - - if (xen_initial_domain()) - x86_init.oem.arch_setup = xen_add_preferred_consoles; - x86_init.oem.banner = xen_banner; - - xen_efi_init(boot_params); - - if (xen_initial_domain()) { - struct xen_platform_op op = { - .cmd = XENPF_get_dom0_console, - }; - int ret = HYPERVISOR_platform_op(&op); - - if (ret > 0) - xen_init_vga(&op.u.dom0_console, - min(ret * sizeof(char), - sizeof(op.u.dom0_console)), - &boot_params->screen_info); - } -} - -void __init mem_map_via_hcall(struct boot_params *boot_params_p) -{ - struct xen_memory_map memmap; - int rc; - - memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); - set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); - if (rc) { - xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); - BUG(); - } - boot_params_p->e820_entries = memmap.nr_entries; -} - /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * @@ -89,8 +42,9 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p) * hypervisor should notify us which memory ranges are suitable for creating * foreign mappings, but that's not yet implemented. */ -void __init xen_reserve_extra_memory(struct boot_params *bootp) +static void __init pvh_reserve_extra_memory(void) { + struct boot_params *bootp = &boot_params; unsigned int i, ram_pages = 0, extra_pages; for (i = 0; i < bootp->e820_entries; i++) { @@ -141,3 +95,58 @@ void __init xen_reserve_extra_memory(struct boot_params *bootp) xen_add_extra_mem(PFN_UP(e->addr), pages); } } + +static void __init pvh_arch_setup(void) +{ + pvh_reserve_extra_memory(); + + if (xen_initial_domain()) + xen_add_preferred_consoles(); +} + +void __init xen_pvh_init(struct boot_params *boot_params) +{ + u32 msr; + u64 pfn; + + xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; + xen_start_flags = pvh_start_info.flags; + + msr = cpuid_ebx(xen_cpuid_base() + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + x86_init.oem.arch_setup = pvh_arch_setup; + x86_init.oem.banner = xen_banner; + + xen_efi_init(boot_params); + + if (xen_initial_domain()) { + struct xen_platform_op op = { + .cmd = XENPF_get_dom0_console, + }; + int ret = HYPERVISOR_platform_op(&op); + + if (ret > 0) + xen_init_vga(&op.u.dom0_console, + min(ret * sizeof(char), + sizeof(op.u.dom0_console)), + &boot_params->screen_info); + } +} + +void __init mem_map_via_hcall(struct boot_params *boot_params_p) +{ + struct xen_memory_map memmap; + int rc; + + memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); + set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc) { + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); + BUG(); + } + boot_params_p->e820_entries = memmap.nr_entries; +} diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index d4cefd8a9af41b48b47bfb5fb0102a40c0640a28..10c660fae8b30031795cfcbead0ccaf1d3883936 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,8 +54,9 @@ struct mc_debug_data { static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static struct mc_debug_data mc_debug_data_early __initdata; -static struct mc_debug_data __percpu *mc_debug_data __refdata = +static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) = &mc_debug_data_early; +static struct mc_debug_data __percpu *mc_debug_data_ptr; DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); static struct static_key mc_debug __ro_after_init; @@ -70,16 +71,20 @@ static int __init xen_parse_mc_debug(char *arg) } early_param("xen_mc_debug", xen_parse_mc_debug); +void mc_percpu_init(unsigned int cpu) +{ + per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu); +} + static int __init mc_debug_enable(void) { - struct mc_debug_data __percpu *mcdb; unsigned long flags; if (!mc_debug_enabled) return 0; - mcdb = alloc_percpu(struct mc_debug_data); - if (!mcdb) { + mc_debug_data_ptr = alloc_percpu(struct mc_debug_data); + if (!mc_debug_data_ptr) { pr_err("xen_mc_debug inactive\n"); static_key_slow_dec(&mc_debug); return -ENOMEM; @@ -88,7 +93,7 @@ static int __init mc_debug_enable(void) /* Be careful when switching to percpu debug data. */ local_irq_save(flags); xen_mc_flush(); - mc_debug_data = mcdb; + mc_percpu_init(0); local_irq_restore(flags); pr_info("xen_mc_debug active\n"); @@ -150,7 +155,7 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); if (static_key_false(&mc_debug)) { - mcdb = this_cpu_ptr(mc_debug_data); + mcdb = __this_cpu_read(mc_debug_data); memcpy(mcdb->entries, b->entries, b->mcidx * sizeof(struct multicall_entry)); } @@ -230,7 +235,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.mc = &b->entries[b->mcidx]; if (static_key_false(&mc_debug)) { - struct mc_debug_data *mcdb = this_cpu_ptr(mc_debug_data); + struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data); mcdb->caller[b->mcidx] = __builtin_return_address(0); mcdb->argsz[b->mcidx] = args; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a0c3e77e3d5b7fd9673d6db284b4ff744255a870..806ddb2391d9bd942dfe56d19a1b995e4a5d9168 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -690,6 +690,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long maxmem_pages; int i; int op; @@ -761,8 +762,8 @@ char * __init xen_memory_setup(void) * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), - extra_pages, max_pages - max_pfn); + maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)); + extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ea57f728b89db97a38c618d563de77841ea3e2e..6863d3da7decfc9f6c78cac30aebbce9da46f6f9 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -305,6 +305,7 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) return rc; xen_pmu_init(cpu); + mc_percpu_init(cpu); /* * Why is this a BUG? If the hypercall fails then everything can be diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index e7775dff94525e617030a5688557ac827f217aaa..0cf16fc79e0bf001d7c82e20134aaa6c6ef66569 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -257,6 +257,9 @@ void xen_mc_callback(void (*fn)(void *), void *data); */ struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); +/* Do percpu data initialization for multicalls. */ +void mc_percpu_init(unsigned int cpu); + extern bool is_xen_pmu; irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0bb3cf0f6443a29800f6a76120bc5e..2cafcf11ee8bee00910c539db74234315272aa7b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; + unsigned long flags; struct blk_mq_tags *tags = hctx->tags; /* @@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) return; } - spin_lock_irq(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); users = tags->active_queues + 1; WRITE_ONCE(tags->active_queues, users); blk_mq_update_wake_batch(tags, users); - spin_unlock_irq(&tags->lock); + spin_unlock_irqrestore(&tags->lock, flags); } /* diff --git a/block/blk-throttle.c b/block/blk-throttle.c index dc6140fa3de06173d141c5de40825917f0dee16a..6943ec720f39ff9c82eb0ce7b76ce99d37df756e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) -/* We measure latency for request size from <= 4k to >= 1M */ -#define LATENCY_BUCKET_SIZE 9 - -struct latency_bucket { - unsigned long total_latency; /* ns / 1024 */ - int samples; -}; - struct throtl_data { /* service tree for active throtl groups */ @@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) return tg->iops[rw]; } -#define request_bucket_index(sectors) \ - clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) - /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported diff --git a/block/genhd.c b/block/genhd.c index 4dc95a46350532bbb5ac2639cea5fabb7a1dd563..1c05dd4c6980b5b3970d6150342917ce61150d77 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -663,12 +663,12 @@ void del_gendisk(struct gendisk *disk) */ if (!test_bit(GD_DEAD, &disk->state)) blk_report_disk_dead(disk, false); - __blk_mark_disk_dead(disk); /* * Drop all partitions now that the disk is marked dead. */ mutex_lock(&disk->open_mutex); + __blk_mark_disk_dead(disk); xa_for_each_start(&disk->part_tbl, idx, part, 1) drop_partition(part); mutex_unlock(&disk->open_mutex); diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 2133085deda77f92801488ca59c5ae70b762023f..1c5218b79fc2acb3a6e1616e8b0ce2a9ac357ca7 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -188,13 +188,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, u8 acpi_ns_is_locked); void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function); -void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, - acpi_adr_space_type space_id); - acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index dc6004daf624b6f997cf8b664c7ad0616b05931f..cf53b9535f18e00730781769d1ec54874f440ac7 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,6 +20,10 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ +static void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, + acpi_adr_space_type space_id); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -61,6 +65,7 @@ acpi_status acpi_ev_initialize_op_regions(void) acpi_gbl_default_address_spaces [i])) { acpi_ev_execute_reg_methods(acpi_gbl_root_node, + ACPI_UINT32_MAX, acpi_gbl_default_address_spaces [i], ACPI_REG_CONNECT); } @@ -668,6 +673,7 @@ cleanup1: * FUNCTION: acpi_ev_execute_reg_methods * * PARAMETERS: node - Namespace node for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * function - Passed to _REG: On (1) or Off (0) * @@ -679,7 +685,7 @@ cleanup1: ******************************************************************************/ void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function) { struct acpi_reg_walk_info info; @@ -713,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, * regions and _REG methods. (i.e. handlers must be installed for all * regions of this Space ID before we can run any _REG methods) */ - (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, + (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &info, NULL); @@ -814,7 +820,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -void +static void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 624361a5f34d8652d205c8f6f4d9d3720ff89edd..95f78383bbdba16bab4d0a2022c87780cf6615bb 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -85,7 +85,8 @@ acpi_install_address_space_handler_internal(acpi_handle device, /* Run all _REG methods for this address space */ if (run_reg) { - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id, + ACPI_REG_CONNECT); } unlock_and_exit: @@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * FUNCTION: acpi_execute_reg_methods * * PARAMETERS: device - Handle for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * * RETURN: Status @@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * ******************************************************************************/ acpi_status -acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +acpi_execute_reg_methods(acpi_handle device, u32 max_depth, + acpi_adr_space_type space_id) { struct acpi_namespace_node *node; acpi_status status; @@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, max_depth, space_id, + ACPI_REG_CONNECT); } else { status = AE_BAD_PARAMETER; } @@ -306,57 +310,3 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) - -/******************************************************************************* - * - * FUNCTION: acpi_execute_orphan_reg_method - * - * PARAMETERS: device - Handle for the device - * space_id - The address space ID - * - * RETURN: Status - * - * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI - * device. This is a _REG method that has no corresponding region - * within the device's scope. - * - ******************************************************************************/ -acpi_status -acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) -{ - struct acpi_namespace_node *node; - acpi_status status; - - ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); - - /* Parameter validation */ - - if (!device) { - return_ACPI_STATUS(AE_BAD_PARAMETER); - } - - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Convert and validate the device handle */ - - node = acpi_ns_validate_handle(device); - if (node) { - - /* - * If an "orphan" _REG method is present in the device's scope - * for the given address space ID, run it. - */ - - acpi_ev_execute_orphan_reg_method(node, space_id); - } else { - status = AE_BAD_PARAMETER; - } - - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - return_ACPI_STATUS(status); -} - -ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 299ec653388ceb8fa3977bb0d88151e3055a1211..38d2f6e6b12b4f7617ce7551deeeb2facb5fc2ed 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { - acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; acpi_status status; acpi_ec_start(ec, false); if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { + acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; + acpi_ec_enter_noirq(ec); status = acpi_install_address_space_handler_no_reg(scope_handle, ACPI_ADR_SPACE_EC, @@ -1506,10 +1507,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); - if (scope_handle != ec->handle) - acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); - + acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } @@ -1724,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_device *device) } } +void acpi_ec_register_opregions(struct acpi_device *adev) +{ + if (first_ec && first_ec->handle != adev->handle) + acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC); +} + static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 601b670356e50e141b172266cf0d3478978a17c4..aadd4c218b320e27a39e0047c11eded1b888dd73 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); +void acpi_ec_register_opregions(struct acpi_device *adev); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index 849c2bd820b93e9006fcb8b39a82ab14360f1962..f33194d1e43f4c9ed0fe1db64116b441ca6be5ff 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config ACPI_NUMA - bool "NUMA support" - depends on NUMA - depends on (X86 || ARM64 || LOONGARCH) - default y if ARM64 + def_bool NUMA && !X86 config ACPI_HMAT bool "ACPI Heterogeneous Memory Attribute Table Support" diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index e3f26e71637a22e685263c3073c32f281d1c6dca..44f91f2c6c5d86ff7432387fef9bb1bd663e867e 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -167,6 +167,19 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) } } break; + + case ACPI_SRAT_TYPE_RINTC_AFFINITY: + { + struct acpi_srat_rintc_affinity *p = + (struct acpi_srat_rintc_affinity *)header; + pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", + p->acpi_processor_uid, + p->proximity_domain, + (p->flags & ACPI_SRAT_RINTC_ENABLED) ? + "enabled" : "disabled"); + } + break; + default: pr_warn("Found unsupported SRAT entry (type = 0x%x)\n", header->type); @@ -450,6 +463,21 @@ acpi_parse_gi_affinity(union acpi_subtable_headers *header, } #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ +static int __init +acpi_parse_rintc_affinity(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *rintc_affinity; + + rintc_affinity = (struct acpi_srat_rintc_affinity *)header; + acpi_table_print_srat_entry(&header->common); + + /* let architecture-dependent part to do it */ + acpi_numa_rintc_affinity_init(rintc_affinity); + + return 0; +} + static int __init acpi_parse_srat(struct acpi_table_header *table) { struct acpi_table_srat *srat = (struct acpi_table_srat *)table; @@ -485,7 +513,7 @@ int __init acpi_numa_init(void) /* SRAT: System Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { - struct acpi_subtable_proc srat_proc[4]; + struct acpi_subtable_proc srat_proc[5]; memset(srat_proc, 0, sizeof(srat_proc)); srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; @@ -496,6 +524,8 @@ int __init acpi_numa_init(void) srat_proc[2].handler = acpi_parse_gicc_affinity; srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY; srat_proc[3].handler = acpi_parse_gi_affinity; + srat_proc[4].id = ACPI_SRAT_TYPE_RINTC_AFFINITY; + srat_proc[4].handler = acpi_parse_rintc_affinity; acpi_table_parse_entries_array(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 59771412686ba3613f7a280aeb6803ec2d155df8..22ae7829a9155de65c7154647f0c919f8f050073 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2273,6 +2273,8 @@ static int acpi_bus_attach(struct acpi_device *device, void *first_pass) if (device->handler) goto ok; + acpi_ec_register_opregions(device); + if (!device->flags.initialized) { device->flags.power_manageable = device->power.states[ACPI_STATE_D0].flags.valid; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f26286e3713ec73388c92ecaa579abc361a617a5..905290c98c3cdb60f77cdf7d30afa03af9a73e29 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1044,13 +1044,13 @@ static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, } /* Find the smallest unused descriptor the "slow way" */ -static u32 slow_desc_lookup_olocked(struct binder_proc *proc) +static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) { struct binder_ref *ref; struct rb_node *n; u32 desc; - desc = 1; + desc = offset; for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->data.desc > desc) @@ -1071,21 +1071,18 @@ static int get_ref_desc_olocked(struct binder_proc *proc, u32 *desc) { struct dbitmap *dmap = &proc->dmap; + unsigned int nbits, offset; unsigned long *new, bit; - unsigned int nbits; /* 0 is reserved for the context manager */ - if (node == proc->context->binder_context_mgr_node) { - *desc = 0; - return 0; - } + offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; if (!dbitmap_enabled(dmap)) { - *desc = slow_desc_lookup_olocked(proc); + *desc = slow_desc_lookup_olocked(proc, offset); return 0; } - if (dbitmap_acquire_first_zero_bit(dmap, &bit) == 0) { + if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { *desc = bit; return 0; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b00961944ab1d266e55ea2f5d476b22318af24a7..b3acbc4174fb1f5af03e81707c120eeb298f9203 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -939,9 +939,9 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) __free_page(alloc->pages[i].page_ptr); page_count++; } - kvfree(alloc->pages); } spin_unlock(&alloc->lock); + kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h index b8ac7b4764fd8d4e3cf4fa85de5b01f3d6dbafb3..956f1bd087d1c53b6d5c62d1409411b3eab7e5e6 100644 --- a/drivers/android/dbitmap.h +++ b/drivers/android/dbitmap.h @@ -6,8 +6,7 @@ * * Used by the binder driver to optimize the allocation of the smallest * available descriptor ID. Each bit in the bitmap represents the state - * of an ID, with the exception of BIT(0) which is used exclusively to - * reference binder's context manager. + * of an ID. * * A dbitmap can grow or shrink as needed. This part has been designed * considering that users might need to briefly release their locks in @@ -58,11 +57,7 @@ static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) if (bit < (dmap->nbits >> 2)) return dmap->nbits >> 1; - /* - * Note that find_last_bit() returns dmap->nbits when no bits - * are set. While this is technically not possible here since - * BIT(0) is always set, this check is left for extra safety. - */ + /* find_last_bit() returns dmap->nbits when no bits are set. */ if (bit == dmap->nbits) return NBITS_MIN; @@ -132,16 +127,17 @@ dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) } /* - * Finds and sets the first zero bit in the bitmap. Upon success @bit + * Finds and sets the next zero bit in the bitmap. Upon success @bit * is populated with the index and 0 is returned. Otherwise, -ENOSPC * is returned to indicate that a dbitmap_grow() is needed. */ static inline int -dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) +dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, + unsigned long *bit) { unsigned long n; - n = find_first_zero_bit(dmap->map, dmap->nbits); + n = find_next_zero_bit(dmap->map, dmap->nbits, offset); if (n == dmap->nbits) return -ENOSPC; @@ -154,9 +150,7 @@ dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) static inline void dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) { - /* BIT(0) should always set for the context manager */ - if (bit) - clear_bit(bit, dmap->map); + clear_bit(bit, dmap->map); } static inline int dbitmap_init(struct dbitmap *dmap) @@ -168,8 +162,6 @@ static inline int dbitmap_init(struct dbitmap *dmap) } dmap->nbits = NBITS_MIN; - /* BIT(0) is reserved for the context manager */ - set_bit(0, dmap->map); return 0; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d6f5e25e1ed8940203714e535ea6920f8c680f3e..473e00a58a8b0c1ed67fb808670219f60efbfa6c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -951,8 +951,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) &sense_key, &asc, &ascq); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { - /* ATA PASS-THROUGH INFORMATION AVAILABLE */ - ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D); + /* + * ATA PASS-THROUGH INFORMATION AVAILABLE + * + * Note: we are supposed to call ata_scsi_set_sense(), which + * respects the D_SENSE bit, instead of unconditionally + * generating the sense data in descriptor format. However, + * because hdparm, hddtemp, and udisks incorrectly assume sense + * data in descriptor format, without even looking at the + * RESPONSE CODE field in the returned sense data (to see which + * format the returned sense data is in), we are stuck with + * being bug compatible with older kernels. + */ + scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D); } } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e7f713cd70d3fd7a413c2568b8dca8f8cc8ba2c4..a876024d8a05f964fb79fb04bd5447b67f28f20c 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) rpp->len += skb->len; if (stat & SAR_RSQE_EPDU) { + unsigned int len, truesize; unsigned char *l1l2; - unsigned int len; l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6); @@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); + truesize = skb->truesize; vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - if (skb->truesize > SAR_FB_SIZE_3) + if (truesize > SAR_FB_SIZE_3) add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); - else if (skb->truesize > SAR_FB_SIZE_2) + else if (truesize > SAR_FB_SIZE_2) add_rx_skb(card, 2, SAR_FB_SIZE_2, 1); - else if (skb->truesize > SAR_FB_SIZE_1) + else if (truesize > SAR_FB_SIZE_1) add_rx_skb(card, 1, SAR_FB_SIZE_1, 1); else add_rx_skb(card, 0, SAR_FB_SIZE_0, 1); diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 69d2138d7efb59d49ddd8414581615f2378fc914..21545ffba0658f6bca1c71436f18c48674dd7e9d 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -316,7 +316,7 @@ endif # PARPORT_PANEL config PANEL_CHANGE_MESSAGE bool "Change LCD initialization message ?" - depends on CHARLCD + depends on CHARLCD || LINEDISP help This allows you to replace the boot message indicating the kernel version and the driver version with a custom message. This is useful on appliances diff --git a/drivers/auxdisplay/arm-charlcd.c b/drivers/auxdisplay/arm-charlcd.c index 0b1c99cca7334ff14b497f9c95709d1bacff0b9f..a7eae99a48f77d53de52132aed1640d6fd8fbcf5 100644 --- a/drivers/auxdisplay/arm-charlcd.c +++ b/drivers/auxdisplay/arm-charlcd.c @@ -270,7 +270,7 @@ static int __init charlcd_probe(struct platform_device *pdev) struct charlcd *lcd; struct resource *res; - lcd = kzalloc(sizeof(struct charlcd), GFP_KERNEL); + lcd = kzalloc(sizeof(*lcd), GFP_KERNEL); if (!lcd) return -ENOMEM; diff --git a/drivers/auxdisplay/charlcd.h b/drivers/auxdisplay/charlcd.h index eed80063a6d20d0bf67a4ac88daf107ccb4b462e..4d4287209d04c4c8f8e983de25c8c91a313b6370 100644 --- a/drivers/auxdisplay/charlcd.h +++ b/drivers/auxdisplay/charlcd.h @@ -36,6 +36,8 @@ enum charlcd_lines { CHARLCD_LINES_2, }; +struct charlcd_ops; + struct charlcd { const struct charlcd_ops *ops; const unsigned char *char_conv; /* Optional */ diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 7ac0b1b1d5482fc788512bf3eec179fc563f8cd3..025dc6855cb253e45015d9ea95d681ddd6f6b97c 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -230,7 +230,7 @@ static int hd44780_probe(struct platform_device *pdev) if (!lcd) goto fail1; - hd = kzalloc(sizeof(struct hd44780), GFP_KERNEL); + hd = kzalloc(sizeof(*hd), GFP_KERNEL); if (!hd) goto fail2; diff --git a/drivers/auxdisplay/hd44780_common.c b/drivers/auxdisplay/hd44780_common.c index 7cbf375b0fa5ed46ff2bee3a47c21cd0dd4e10b4..4ef87c3118c0902b00859cbb114634134fa47394 100644 --- a/drivers/auxdisplay/hd44780_common.c +++ b/drivers/auxdisplay/hd44780_common.c @@ -366,4 +366,5 @@ struct hd44780_common *hd44780_common_alloc(void) } EXPORT_SYMBOL_GPL(hd44780_common_alloc); +MODULE_DESCRIPTION("Common functions for HD44780 (and compatibles) LCD displays"); MODULE_LICENSE("GPL"); diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index ce987944662c8bee6dc2fd3fcec6faf1bbbaad52..8a7034b41d50e8f0ab1228000bb4b703f277e825 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -483,6 +483,7 @@ static int ht16k33_led_probe(struct device *dev, struct led_classdev *led, led->max_brightness = MAX_BRIGHTNESS; err = devm_led_classdev_register_ext(dev, led, &init_data); + fwnode_handle_put(init_data.fwnode); if (err) dev_err(dev, "Failed to register LED\n"); diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index e2b546210f8d8d6d20199044699147f307577a2c..731ffdfafc4ed9caa1f87700c2fcae8f012040fe 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c @@ -8,7 +8,9 @@ * Copyright (C) 2021 Glider bv */ +#ifndef CONFIG_PANEL_BOOT_MESSAGE #include +#endif #include #include @@ -312,6 +314,12 @@ static int linedisp_init_map(struct linedisp *linedisp) return 0; } +#ifdef CONFIG_PANEL_BOOT_MESSAGE +#define LINEDISP_INIT_TEXT CONFIG_PANEL_BOOT_MESSAGE +#else +#define LINEDISP_INIT_TEXT "Linux " UTS_RELEASE " " +#endif + /** * linedisp_register - register a character line display * @linedisp: pointer to character line display structure @@ -359,7 +367,7 @@ int linedisp_register(struct linedisp *linedisp, struct device *parent, goto out_del_timer; /* display a default message */ - err = linedisp_display(linedisp, "Linux " UTS_RELEASE " ", -1); + err = linedisp_display(linedisp, LINEDISP_INIT_TEXT, -1); if (err) goto out_del_dev; @@ -388,4 +396,5 @@ void linedisp_unregister(struct linedisp *linedisp) } EXPORT_SYMBOL_NS_GPL(linedisp_unregister, LINEDISP); +MODULE_DESCRIPTION("Character line display core support"); MODULE_LICENSE("GPL"); diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 5b59d133b6af4f514caa241b07d898957ac86489..555aee3ee8e7325e854785e28e39c5dbf91ad9ae 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -445,7 +445,7 @@ static int __init arch_acpi_numa_init(void) ret = acpi_numa_init(); if (ret) { - pr_info("Failed to initialise from firmware\n"); + pr_debug("Failed to initialise from firmware\n"); return ret; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 730cae66607c02530d456b61344d431dc313c031..8c0733d3aad8e96c7d25394cd352cae67d06d3a0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj) static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); + struct device_driver *driver; int retval = 0; /* add device node properties if present */ @@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Synchronize with module_remove_driver() */ + rcu_read_lock(); + driver = READ_ONCE(dev->driver); + if (driver) + add_uevent_var(env, "DRIVER=%s", driver->name); + rcu_read_unlock(); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; diff --git a/drivers/base/module.c b/drivers/base/module.c index 7af224e6914a104744977c0ac09dc2790166f654..f742ad2a21da021c6d86765febe4ccb51f8c548d 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "base.h" static char *make_driver_name(const struct device_driver *drv) @@ -97,6 +98,9 @@ void module_remove_driver(const struct device_driver *drv) if (!drv) return; + /* Synchronize with dev_uevent() */ + synchronize_rcu(); + sysfs_remove_link(&drv->p->kobj, "module"); if (drv->owner) diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index f0df2da6d522e7e3949eb91695addea16b908759..2dea9d259c493a48c23128bea9e1c11e4066ea2e 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -110,7 +110,8 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, struct maple_tree *mt = map->cache; MA_STATE(mas, mt, min, max); unsigned long *entry, *lower, *upper; - unsigned long lower_index, lower_last; + /* initialized to work around false-positive -Wuninitialized warning */ + unsigned long lower_index = 0, lower_last = 0; unsigned long upper_index, upper_last; int ret = 0; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f92673f05c7abc418a78a5b1af17accc80568f7f..a9e49b212341bebe58883949c5d3ef9174ab9253 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3422,6 +3422,7 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) /** * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() * @device: DRBD device. + * @peer_device: Peer DRBD device. * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ @@ -3448,6 +3449,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device, /** * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() * @device: DRBD device. + * @peer_device: Peer DRBD device. * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ @@ -3501,6 +3503,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) * @done: callback to be called after the bitmap IO was performed * @why: Descriptive text of the reason for doing the IO * @flags: Bitmap flags + * @peer_device: Peer DRBD device. * * While IO on the bitmap happens we freeze application IO thus we ensure * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be @@ -3549,6 +3552,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * @io_fn: IO callback to be called when bitmap IO is possible * @why: Descriptive text of the reason for doing the IO * @flags: Bitmap flags + * @peer_device: Peer DRBD device. * * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 008e850555f41a755e6cdc44af09a697f2a36a7e..9c8b19a22c2a805ecd7ce86fff7c2b988192e62f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -362,7 +362,7 @@ enum rbd_watch_state { enum rbd_lock_state { RBD_LOCK_STATE_UNLOCKED, RBD_LOCK_STATE_LOCKED, - RBD_LOCK_STATE_RELEASING, + RBD_LOCK_STATE_QUIESCING, }; /* WatchNotify::ClientId */ @@ -422,7 +422,7 @@ struct rbd_device { struct list_head running_list; struct completion acquire_wait; int acquire_err; - struct completion releasing_wait; + struct completion quiescing_wait; spinlock_t object_map_lock; u8 *object_map; @@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) lockdep_assert_held(&rbd_dev->lock_rwsem); return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || - rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING; + rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING; } static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) @@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req) lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); if (!list_empty(&img_req->lock_item)) { + rbd_assert(!list_empty(&rbd_dev->running_list)); list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING && list_empty(&rbd_dev->running_list)); } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) - complete(&rbd_dev->releasing_wait); + complete(&rbd_dev->quiescing_wait); } static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) @@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) if (rbd_lock_add_request(img_req)) return 1; - if (rbd_dev->opts->exclusive) { - WARN_ON(1); /* lock got released? */ - return -EROFS; - } - /* * Note the use of mod_delayed_work() in rbd_acquire_lock() * and cancel_delayed_work() in wake_lock_waiters(). @@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) /* * Ensure that all in-flight IO is flushed. */ - rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; - rbd_assert(!completion_done(&rbd_dev->releasing_wait)); + rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING; + rbd_assert(!completion_done(&rbd_dev->quiescing_wait)); if (list_empty(&rbd_dev->running_list)) return true; up_write(&rbd_dev->lock_rwsem); - wait_for_completion(&rbd_dev->releasing_wait); + wait_for_completion(&rbd_dev->quiescing_wait); down_write(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) + if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING) return false; rbd_assert(list_empty(&rbd_dev->running_list)); @@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev) rbd_warn(rbd_dev, "failed to update lock cookie: %d", ret); + if (rbd_dev->opts->exclusive) + rbd_warn(rbd_dev, + "temporarily releasing lock on exclusive mapping"); + /* * Lock cookie cannot be updated on older OSDs, so do * a manual release and queue an acquire. @@ -5376,7 +5376,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec) INIT_LIST_HEAD(&rbd_dev->acquiring_list); INIT_LIST_HEAD(&rbd_dev->running_list); init_completion(&rbd_dev->acquire_wait); - init_completion(&rbd_dev->releasing_wait); + init_completion(&rbd_dev->quiescing_wait); spin_lock_init(&rbd_dev->object_map_lock); @@ -6582,11 +6582,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) if (ret) return ret; - /* - * The lock may have been released by now, unless automatic lock - * transitions are disabled. - */ - rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); return 0; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6fb799ec0d88c1677721ca22caeb2d7910a557c5..890c08792ba8bc97a6d2c04546cba1585ce6bbde 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -48,6 +48,9 @@ #define UBLK_MINORS (1U << MINORBITS) +/* private ioctl command mirror */ +#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC) + /* All UBLK_F_* have to be included into UBLK_F_ALL */ #define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \ | UBLK_F_URING_CMD_COMP_IN_TASK \ @@ -2903,7 +2906,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, case UBLK_CMD_DEL_DEV: ret = ublk_ctrl_del_dev(&ub, true); break; - case UBLK_U_CMD_DEL_DEV_ASYNC: + case UBLK_CMD_DEL_DEV_ASYNC: ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 90a94a111e67e18d53c0cc98e5e3499055a27840..769fa288179d3969f45148a3ced9ee2e00650d18 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -413,6 +413,7 @@ config BT_ATH3K config BT_MTKSDIO tristate "MediaTek HCI SDIO driver" depends on MMC + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI SDIO driver. @@ -425,6 +426,7 @@ config BT_MTKSDIO config BT_MTKUART tristate "MediaTek HCI UART driver" depends on SERIAL_DEV_BUS + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI UART driver. diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e7a612504ab1d33908521be39790492dd1fb4e40..2ebc970e6573fb2d18498aa1d811e08d1bc0bea2 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -3085,6 +3085,9 @@ static int btintel_setup_combined(struct hci_dev *hdev) btintel_set_dsm_reset_method(hdev, &ver_tlv); err = btintel_bootloader_setup_tlv(hdev, &ver_tlv); + if (err) + goto exit_error; + btintel_register_devcoredump_support(hdev); btintel_print_fseq_info(hdev); break; diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index b7c348687a773f85e7667b7127fd6442c408d8ce..2b7c80043aa2ebd4c525d78e77af423df142d7cc 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -437,6 +437,7 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(btmtk_process_coredump); +#if IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) static void btmtk_usb_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; @@ -1262,7 +1263,8 @@ int btmtk_usb_suspend(struct hci_dev *hdev) struct btmtk_data *btmtk_data = hci_get_priv(hdev); /* Stop urb anchor for iso data transmission */ - usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); + if (test_bit(BTMTK_ISOPKT_RUNNING, &btmtk_data->flags)) + usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); return 0; } @@ -1487,6 +1489,7 @@ int btmtk_usb_shutdown(struct hci_dev *hdev) return 0; } EXPORT_SYMBOL_GPL(btmtk_usb_shutdown); +#endif MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Mark Chen "); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca64666769027acba1ec52bf02dbdecb487c9bdc..45adc1560d949af8aff2ed7cdc717cd9c5c228d5 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2160,7 +2160,7 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; - if (power->pwrseq) { + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); return; @@ -2187,10 +2187,6 @@ static void qca_power_shutdown(struct hci_uart *hu) } break; - case QCA_QCA6390: - pwrseq_power_off(qcadev->bt_power->pwrseq); - break; - default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2416,11 +2412,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) break; case QCA_QCA6390: - qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, - "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + if (dev_of_node(&serdev->dev)) { + qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, + "bluetooth"); + if (IS_ERR(qcadev->bt_power->pwrseq)) + return PTR_ERR(qcadev->bt_power->pwrseq); + break; + } + fallthrough; default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 8767e04d6c89738b21f1fbf091bd8f77f1c431ae..2b59ef61dda23b77243693be2c33bc02effdb2a5 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2291,11 +2291,9 @@ static int sysc_init_idlemode(struct sysc *ddata, u8 *idlemodes, const char *name) { struct device_node *np = ddata->dev->of_node; - struct property *prop; - const __be32 *p; u32 val; - of_property_for_each_u32(np, name, prop, p, val) { + of_property_for_each_u32(np, name, val) { if (val >= SYSC_NR_IDLEMODES) { dev_err(ddata->dev, "invalid idlemode: %i\n", val); return -EINVAL; diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig index 94abd8f632a7b3eb02b7e387402cee431009ee8a..db51386c663a6640dad549ce427c02c19d0bd62e 100644 --- a/drivers/cache/Kconfig +++ b/drivers/cache/Kconfig @@ -18,6 +18,7 @@ config STARFIVE_STARLINK_CACHE bool "StarFive StarLink Cache controller" depends on RISCV depends on ARCH_STARFIVE + depends on 64BIT select RISCV_DMA_NONCOHERENT select RISCV_NONSTANDARD_CACHE_OPS help diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index cf89a9631107dbd54561c222003033d49c3bd41c..a4f4291b4492cbc3f4b2fb80ef3b4b3b93bf554b 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -421,4 +421,5 @@ static void __exit ds1620_exit(void) module_init(ds1620_init); module_exit(ds1620_exit); +MODULE_DESCRIPTION("Dallas Semiconductor DS1620 thermometer driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index ea378c0ed549868e50050abc50ef2dac7be20d4f..92cee5717237b77899e92cf4bd6de834d98a590b 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -241,6 +241,7 @@ static void __exit nwbutton_exit (void) MODULE_AUTHOR("Alex Holden"); +MODULE_DESCRIPTION("NetWinder button driver"); MODULE_LICENSE("GPL"); module_init(nwbutton_init); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 0973c2c2b01a48aa9bf4cd1084951950318fe5c2..9f52f0306ef71f691e451bcf63bbb2b5794950d0 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -618,6 +618,7 @@ static void __exit nwflash_exit(void) iounmap((void *)FLASH_BASE); } +MODULE_DESCRIPTION("NetWinder flash memory driver"); MODULE_LICENSE("GPL"); module_param(flashdebug, bool, 0644); diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8cb8267e27a26d226fb7d1c3515c11..45771b1a3716a2ae3fc3c0d558d757841ff0df39 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -1906,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1935,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1971,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1980,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } @@ -2096,9 +2111,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2258,6 +2275,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,12 +2286,17 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c index 1a4e6340f95ce4c276bc7df61c7c2590e0ad1d99..058420562020791865e69cfce3ee4771bab8cd93 100644 --- a/drivers/clk/clk-conf.c +++ b/drivers/clk/clk-conf.c @@ -81,13 +81,11 @@ err: static int __set_clk_rates(struct device_node *node, bool clk_supplier) { struct of_phandle_args clkspec; - struct property *prop; - const __be32 *cur; int rc, index = 0; struct clk *clk; u32 rate; - of_property_for_each_u32(node, "assigned-clock-rates", prop, cur, rate) { + of_property_for_each_u32(node, "assigned-clock-rates", rate) { if (rate) { rc = of_parse_phandle_with_args(node, "assigned-clocks", "#clock-cells", index, &clkspec); diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c index 4ce83c5265b878b97aee9eedb669a44d4153dfb8..a4c92c5ef3ff749b5a07c8b86ee783307c73066d 100644 --- a/drivers/clk/clk-si5351.c +++ b/drivers/clk/clk-si5351.c @@ -1175,8 +1175,8 @@ static int si5351_dt_parse(struct i2c_client *client, { struct device_node *child, *np = client->dev.of_node; struct si5351_platform_data *pdata; - struct property *prop; - const __be32 *p; + u32 array[4]; + int sz, i; int num = 0; u32 val; @@ -1191,20 +1191,24 @@ static int si5351_dt_parse(struct i2c_client *client, * property silabs,pll-source : , [<..>] * allow to selectively set pll source */ - of_property_for_each_u32(np, "silabs,pll-source", prop, p, num) { + sz = of_property_read_variable_u32_array(np, "silabs,pll-source", array, 2, 4); + sz = (sz == -EINVAL) ? 0 : sz; /* Missing property is OK */ + if (sz < 0) + return dev_err_probe(&client->dev, sz, "invalid pll-source\n"); + if (sz % 2) + return dev_err_probe(&client->dev, -EINVAL, + "missing pll-source for pll %d\n", array[sz - 1]); + + for (i = 0; i < sz; i += 2) { + num = array[i]; + val = array[i + 1]; + if (num >= 2) { dev_err(&client->dev, "invalid pll %d on pll-source prop\n", num); return -EINVAL; } - p = of_prop_next_u32(prop, p, &val); - if (!p) { - dev_err(&client->dev, - "missing pll-source for pll %d\n", num); - return -EINVAL; - } - switch (val) { case 0: pdata->pll_src[num] = SI5351_PLL_SRC_XTAL; @@ -1232,19 +1236,24 @@ static int si5351_dt_parse(struct i2c_client *client, pdata->pll_reset[0] = true; pdata->pll_reset[1] = true; - of_property_for_each_u32(np, "silabs,pll-reset-mode", prop, p, num) { + sz = of_property_read_variable_u32_array(np, "silabs,pll-reset-mode", array, 2, 4); + sz = (sz == -EINVAL) ? 0 : sz; /* Missing property is OK */ + if (sz < 0) + return dev_err_probe(&client->dev, sz, "invalid pll-reset-mode\n"); + if (sz % 2) + return dev_err_probe(&client->dev, -EINVAL, + "missing pll-reset-mode for pll %d\n", array[sz - 1]); + + for (i = 0; i < sz; i += 2) { + num = array[i]; + val = array[i + 1]; + if (num >= 2) { dev_err(&client->dev, "invalid pll %d on pll-reset-mode prop\n", num); return -EINVAL; } - p = of_prop_next_u32(prop, p, &val); - if (!p) { - dev_err(&client->dev, - "missing pll-reset-mode for pll %d\n", num); - return -EINVAL; - } switch (val) { case 0: diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8cca52be993f4c831660f0c0d13bac290a37e4de..285ed1ad8a37807e9f1f8a77b53c4cc996a8c27f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -5364,9 +5364,8 @@ EXPORT_SYMBOL_GPL(of_clk_get_parent_count); const char *of_clk_get_parent_name(const struct device_node *np, int index) { struct of_phandle_args clkspec; - struct property *prop; const char *clk_name; - const __be32 *vp; + bool found = false; u32 pv; int rc; int count; @@ -5383,15 +5382,16 @@ const char *of_clk_get_parent_name(const struct device_node *np, int index) /* if there is an indices property, use it to transfer the index * specified into an array offset for the clock-output-names property. */ - of_property_for_each_u32(clkspec.np, "clock-indices", prop, vp, pv) { + of_property_for_each_u32(clkspec.np, "clock-indices", pv) { if (index == pv) { index = count; + found = true; break; } count++; } /* We went off the end of 'clock-indices' without finding it */ - if (prop && !vp) + if (of_property_present(clkspec.np, "clock-indices") && !found) return NULL; if (of_property_read_string_index(clkspec.np, "clock-output-names", @@ -5504,14 +5504,12 @@ static int parent_ready(struct device_node *np) int of_clk_detect_critical(struct device_node *np, int index, unsigned long *flags) { - struct property *prop; - const __be32 *cur; uint32_t idx; if (!np || !flags) return -EINVAL; - of_property_for_each_u32(np, "clock-critical", prop, cur, idx) + of_property_for_each_u32(np, "clock-critical", idx) if (index == idx) *flags |= CLK_IS_CRITICAL; diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c index ad2d0df43dc6ff520fc146ae342fb5227de2e898..ec60ecb517f1f9939acc38386dfaab7da27f4bac 100644 --- a/drivers/clk/davinci/da8xx-cfgchip.c +++ b/drivers/clk/davinci/da8xx-cfgchip.c @@ -508,7 +508,7 @@ da8xx_cfgchip_register_usb0_clk48(struct device *dev, const char * const parent_names[] = { "usb_refclkin", "pll0_auxclk" }; struct clk *fck_clk; struct da8xx_usb0_clk48 *usb0; - struct clk_init_data init; + struct clk_init_data init = {}; int ret; fck_clk = devm_clk_get(dev, "fck"); @@ -583,7 +583,7 @@ da8xx_cfgchip_register_usb1_clk48(struct device *dev, { const char * const parent_names[] = { "usb0_clk48", "usb_refclkin" }; struct da8xx_usb1_clk48 *usb1; - struct clk_init_data init; + struct clk_init_data init = {}; int ret; usb1 = devm_kzalloc(dev, sizeof(*usb1), GFP_KERNEL); diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index ea3788ba46f7e6e18a3a39070410318f5b60ded5..33cc1f73c69d1f875a193aea0552902268dc8716 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -227,11 +227,9 @@ EXPORT_SYMBOL_GPL(qcom_cc_register_sleep_clk); static void qcom_cc_drop_protected(struct device *dev, struct qcom_cc *cc) { struct device_node *np = dev->of_node; - struct property *prop; - const __be32 *p; u32 i; - of_property_for_each_u32(np, "protected-clocks", prop, p, i) { + of_property_for_each_u32(np, "protected-clocks", i) { if (i >= cc->num_rclks) continue; diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index a026ccca7315f1c403daa9e1201c8536e8a83ae7..28945b6b0ee1c1e3908349fa9f071d3845057eaf 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -1040,19 +1040,20 @@ static unsigned long __init exynos4_get_xom(void) static void __init exynos4_clk_register_finpll(struct samsung_clk_provider *ctx) { struct samsung_fixed_rate_clock fclk; - struct clk *clk; - unsigned long finpll_f = 24000000; + unsigned long finpll_f; + unsigned int parent; char *parent_name; unsigned int xom = exynos4_get_xom(); parent_name = xom & 1 ? "xusbxti" : "xxti"; - clk = clk_get(NULL, parent_name); - if (IS_ERR(clk)) { + parent = xom & 1 ? CLK_XUSBXTI : CLK_XXTI; + + finpll_f = clk_hw_get_rate(ctx->clk_data.hws[parent]); + if (!finpll_f) { pr_err("%s: failed to lookup parent clock %s, assuming " "fin_pll clock frequency is 24MHz\n", __func__, parent_name); - } else { - finpll_f = clk_get_rate(clk); + finpll_f = 24000000; } fclk.id = CLK_FIN_PLL; diff --git a/drivers/clk/sophgo/clk-sg2042-pll.c b/drivers/clk/sophgo/clk-sg2042-pll.c index 9695e64fc23b975721f7ed01f00473e53039b8a6..ff9deeef509b8f1d6a565094e48285d59a54cccb 100644 --- a/drivers/clk/sophgo/clk-sg2042-pll.c +++ b/drivers/clk/sophgo/clk-sg2042-pll.c @@ -387,7 +387,7 @@ static int sg2042_clk_pll_set_rate(struct clk_hw *hw, struct sg2042_pll_clock *pll = to_sg2042_pll_clk(hw); struct sg2042_pll_ctrl pctrl_table; unsigned long flags; - u32 value; + u32 value = 0; int ret; spin_lock_irqsave(pll->lock, flags); diff --git a/drivers/clk/sunxi/clk-simple-gates.c b/drivers/clk/sunxi/clk-simple-gates.c index 0399627c226a68838a82ba219d497fbf4e2c7a5e..845efc1ec800b990d216e93e39a716eb890f0f2b 100644 --- a/drivers/clk/sunxi/clk-simple-gates.c +++ b/drivers/clk/sunxi/clk-simple-gates.c @@ -21,11 +21,9 @@ static void __init sunxi_simple_gates_setup(struct device_node *node, { struct clk_onecell_data *clk_data; const char *clk_parent, *clk_name; - struct property *prop; struct resource res; void __iomem *clk_reg; void __iomem *reg; - const __be32 *p; int number, i = 0, j; u8 clk_bit; u32 index; @@ -47,7 +45,7 @@ static void __init sunxi_simple_gates_setup(struct device_node *node, if (!clk_data->clks) goto err_free_data; - of_property_for_each_u32(node, "clock-indices", prop, p, index) { + of_property_for_each_u32(node, "clock-indices", index) { of_property_read_string_index(node, "clock-output-names", i, &clk_name); diff --git a/drivers/clk/sunxi/clk-sun8i-bus-gates.c b/drivers/clk/sunxi/clk-sun8i-bus-gates.c index b87f331f63c9e882e81e21285a0a181a0a69038d..8482ac8e5898a09dcd72f6ae21790c097077095d 100644 --- a/drivers/clk/sunxi/clk-sun8i-bus-gates.c +++ b/drivers/clk/sunxi/clk-sun8i-bus-gates.c @@ -24,11 +24,9 @@ static void __init sun8i_h3_bus_gates_init(struct device_node *node) const char *parents[PARENT_MAX]; struct clk_onecell_data *clk_data; const char *clk_name; - struct property *prop; struct resource res; void __iomem *clk_reg; void __iomem *reg; - const __be32 *p; int number, i; u8 clk_bit; int index; @@ -58,7 +56,7 @@ static void __init sun8i_h3_bus_gates_init(struct device_node *node) goto err_free_data; i = 0; - of_property_for_each_u32(node, "clock-indices", prop, p, index) { + of_property_for_each_u32(node, "clock-indices", index) { of_property_read_string_index(node, "clock-output-names", i, &clk_name); diff --git a/drivers/clk/thead/Kconfig b/drivers/clk/thead/Kconfig index 1710d50bf9d4650ae10dd08ed4781ea004512921..95e0d9eb965eb490d539989a4a2d1cbfa282054f 100644 --- a/drivers/clk/thead/Kconfig +++ b/drivers/clk/thead/Kconfig @@ -3,6 +3,7 @@ config CLK_THEAD_TH1520_AP bool "T-HEAD TH1520 AP clock support" depends on ARCH_THEAD || COMPILE_TEST + depends on 64BIT default ARCH_THEAD select REGMAP_MMIO help diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c091e3fa378fb8750771396fcd1a882..17e32ae08720cb96554cd0d2837dd7821b0a56fd 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 6e46781bc9acff9e8fd4f4bfcff27d5d7d7b0eb3..b9561e3f196c42d2b5bae7d56a94b0ee87a47a5a 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -418,8 +418,6 @@ void __init samsung_pwm_clocksource_init(void __iomem *base, static int __init samsung_pwm_alloc(struct device_node *np, const struct samsung_pwm_variant *variant) { - struct property *prop; - const __be32 *cur; u32 val; int i, ret; @@ -427,7 +425,7 @@ static int __init samsung_pwm_alloc(struct device_node *np, for (i = 0; i < SAMSUNG_PWM_NUM; ++i) pwm.irq[i] = irq_of_parse_and_map(np, i); - of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) { + of_property_for_each_u32(np, "samsung,pwm-outputs", val) { if (val >= SAMSUNG_PWM_NUM) { pr_warn("%s: invalid channel index in samsung,pwm-outputs property\n", __func__); continue; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 392a8000b238b52fce006f9c325439be930dc147..c0278d023cfce595b7cd9f8bb72df60477c8792c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3405,6 +3405,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, 179, 64, 16)), X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 571069863c62946c8341a1c98810fd0325c69e95..82b78e331d8ed204ff31bd7c8fe118fd5a0382a3 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -22,56 +22,42 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -/* - * Find a targets entry (n) in the host bridge interleave list. - * CXL Specification 3.0 Table 9-22 - */ -static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, - int ig) -{ - int i = 0, n = 0; - u8 eiw; - - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ - if (iw != 3) { - for (i = 0; i < cximsd->nr_maps; i++) - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; - } - /* IW: 3,6,12 add a modulo calculation to 'n' */ - if (!is_power_of_2(iw)) { - if (ways_to_eiw(iw, &eiw)) - return -1; - hpa &= GENMASK_ULL(51, eiw + ig); - n |= do_div(hpa, 3) << i; - } - return n; -} -static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) +static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int ig = cxld->interleave_granularity; - int iw = cxld->interleave_ways; - int n = 0; - u64 hpa; - - if (dev_WARN_ONCE(&cxld->dev, - cxld->interleave_ways != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; + int hbiw = cxlrd->cxlsd.nr_targets; + u64 val; + int pos; - hpa = cxlrd->res->start + pos * ig; + /* No xormaps for host bridge interleave ways of 1 or 3 */ + if (hbiw == 1 || hbiw == 3) + return hpa; - /* Entry (n) is 0 for no interleave (iw == 1) */ - if (iw != 1) - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); + /* + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore + * the position bit to its value before the xormap was applied at + * HPA->DPA translation. + * + * pos is the lowest set bit in an XORMAP + * val is the XORALLBITS(HPA & XORMAP) + * + * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS + * as an operation that outputs a single bit by XORing all the + * bits in the input (hpa & xormap). Implement XORALLBITS using + * hweight64(). If the hamming weight is even the XOR of those + * bits results in val==0, if odd the XOR result is val==1. + */ - if (n < 0) - return NULL; + for (int i = 0; i < cximsd->nr_maps; i++) { + if (!cximsd->xormaps[i]) + continue; + pos = __ffs(cximsd->xormaps[i]); + val = (hweight64(hpa & cximsd->xormaps[i]) & 1); + hpa = (hpa & ~(1ULL << pos)) | (val << pos); + } - return cxlrd->cxlsd.target[n]; + return hpa; } struct cxl_cxims_context { @@ -361,7 +347,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; - cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; int rc; @@ -389,13 +374,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, if (rc) return rc; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) - cxl_calc_hb = cxl_hb_modulo; - else - cxl_calc_hb = cxl_hb_xor; - struct cxl_root_decoder *cxlrd __free(put_cxlrd) = - cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); + cxl_root_decoder_alloc(root_port, ways); + if (IS_ERR(cxlrd)) return PTR_ERR(cxlrd); @@ -434,6 +415,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) + cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + rc = cxl_decoder_add(cxld, target_map); if (rc) return rc; @@ -482,6 +466,8 @@ struct cxl_chbs_context { unsigned long long uid; resource_size_t base; u32 cxl_version; + int nr_versions; + u32 saved_version; }; static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, @@ -490,22 +476,31 @@ static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, struct cxl_chbs_context *ctx = arg; struct acpi_cedt_chbs *chbs; - if (ctx->base != CXL_RESOURCE_NONE) - return 0; - chbs = (struct acpi_cedt_chbs *) header; - if (ctx->uid != chbs->uid) + if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 && + chbs->length != CXL_RCRB_SIZE) return 0; - ctx->cxl_version = chbs->cxl_version; if (!chbs->base) return 0; - if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 && - chbs->length != CXL_RCRB_SIZE) + if (ctx->saved_version != chbs->cxl_version) { + /* + * cxl_version cannot be overwritten before the next two + * checks, then use saved_version + */ + ctx->saved_version = chbs->cxl_version; + ctx->nr_versions++; + } + + if (ctx->base != CXL_RESOURCE_NONE) return 0; + if (ctx->uid != chbs->uid) + return 0; + + ctx->cxl_version = chbs->cxl_version; ctx->base = chbs->base; return 0; @@ -529,10 +524,19 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, .uid = uid, .base = CXL_RESOURCE_NONE, .cxl_version = UINT_MAX, + .saved_version = UINT_MAX, }; acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs_iter, ctx); + if (ctx->nr_versions > 1) { + /* + * Disclaim eRCD support given some component register may + * only be found via CHBCR + */ + dev_info(dev, "Unsupported platform config, mixed Virtual Host and Restricted CXL Host hierarchy."); + } + return 0; } @@ -921,6 +925,7 @@ static void __exit cxl_acpi_exit(void) /* load before dax_hmem sees 'Soft Reserved' CXL ranges */ subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); +MODULE_DESCRIPTION("CXL ACPI: Platform Support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(ACPI); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 625394486459ee87f842b34b1e10f3c5e3f2996a..72a506c9dbd0a8390fe1edc6fdfcd028a7a3ca4e 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -28,12 +28,12 @@ int cxl_region_init(void); void cxl_region_exit(void); int cxl_get_poison_by_endpoint(struct cxl_port *port); struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa); -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa); +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa); #else -static inline u64 -cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) +static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, + const struct cxl_memdev *cxlmd, u64 dpa) { return ULLONG_MAX; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2626f3fff201d849c185bd6637600527321e0a33..e5cdeafdf76e7dbcc117ccd9af949d34526804aa 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -875,10 +875,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, guard(rwsem_read)(&cxl_region_rwsem); guard(rwsem_read)(&cxl_dpa_rwsem); - dpa = le64_to_cpu(evt->common.phys_addr) & CXL_DPA_MASK; + dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - hpa = cxl_trace_hpa(cxlr, cxlmd, dpa); + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, cxlr, hpa, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 8567dd11eaac7450b56da8883c33d1990fb0daa2..51132a575b2766a6409124e894d340903bff64bb 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -338,10 +338,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, if (rc) return rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; @@ -368,6 +364,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index cb730050d3d42d1331f1499f8c147237964fcea3..1d5007e3795a3c107cfebee079f550188a715781 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1733,21 +1733,6 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, return 0; } -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) -{ - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int iw; - - iw = cxld->interleave_ways; - if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; - - return cxlrd->cxlsd.target[pos % iw]; -} -EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL); - static struct lock_class_key cxl_decoder_key; /** @@ -1807,7 +1792,6 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * cxl_root_decoder_alloc - Allocate a root level decoder * @port: owning CXL root of this decoder * @nr_targets: static number of downstream targets - * @calc_hb: which host bridge covers the n'th position by granularity * * Return: A new cxl decoder to be registered by cxl_decoder_add(). A * 'CXL root' decoder is one that decodes from a top-level / static platform @@ -1815,8 +1799,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * topology. */ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb) + unsigned int nr_targets) { struct cxl_root_decoder *cxlrd; struct cxl_switch_decoder *cxlsd; @@ -1838,7 +1821,6 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, return ERR_PTR(rc); } - cxlrd->calc_hb = calc_hb; mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; @@ -2356,5 +2338,6 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); +MODULE_DESCRIPTION("CXL: Core Compute Express Link support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 538ebd5a64fd960eda6b29addbe98c9e3edc7595..21ad5f24287581e179b00d6f651a47a90a12f720 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -1632,10 +1633,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr, const struct cxl_dport *dport, int pos) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int iw = cxld->interleave_ways; struct cxl_port *iter; int rc; - if (cxlrd->calc_hb(cxlrd, pos) != dport) { + if (dport != cxlrd->cxlsd.target[pos % iw]) { dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), dev_name(&cxlrd->cxlsd.cxld.dev)); @@ -2310,6 +2314,7 @@ static void unregister_region(void *_cxlr) int i; unregister_memory_notifier(&cxlr->memory_notifier); + unregister_mt_adistance_algorithm(&cxlr->adist_notifier); device_del(&cxlr->dev); /* @@ -2386,14 +2391,23 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) return true; } +static int cxl_region_nid(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct resource *res; + + guard(rwsem_read)(&cxl_region_rwsem); + res = p->res; + if (!res) + return NUMA_NO_NODE; + return phys_to_target_node(res->start); +} + static int cxl_region_perf_attrs_callback(struct notifier_block *nb, unsigned long action, void *arg) { struct cxl_region *cxlr = container_of(nb, struct cxl_region, memory_notifier); - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = p->targets[0]; - struct cxl_decoder *cxld = &cxled->cxld; struct memory_notify *mnb = arg; int nid = mnb->status_change_nid; int region_nid; @@ -2401,7 +2415,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid == NUMA_NO_NODE || action != MEM_ONLINE) return NOTIFY_DONE; - region_nid = phys_to_target_node(cxld->hpa_range.start); + region_nid = cxl_region_nid(cxlr); if (nid != region_nid) return NOTIFY_DONE; @@ -2411,6 +2425,27 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, return NOTIFY_OK; } +static int cxl_region_calculate_adistance(struct notifier_block *nb, + unsigned long nid, void *data) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + adist_notifier); + struct access_coordinate *perf; + int *adist = data; + int region_nid; + + region_nid = cxl_region_nid(cxlr); + if (nid != region_nid) + return NOTIFY_OK; + + perf = &cxlr->coord[ACCESS_COORDINATE_CPU]; + + if (mt_perf_to_adistance(perf, adist)) + return NOTIFY_OK; + + return NOTIFY_STOP; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2453,6 +2488,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; register_memory_notifier(&cxlr->memory_notifier); + cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; + cxlr->adist_notifier.priority = 100; + register_mt_adistance_algorithm(&cxlr->adist_notifier); + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); @@ -2816,20 +2855,13 @@ struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) return ctx.cxlr; } -static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) +static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) { struct cxl_region_params *p = &cxlr->params; int gran = p->interleave_granularity; int ways = p->interleave_ways; u64 offset; - /* Is the hpa within this region at all */ - if (hpa < p->res->start || hpa > p->res->end) { - dev_dbg(&cxlr->dev, - "Addr trans fail: hpa 0x%llx not in region\n", hpa); - return false; - } - /* Is the hpa in an expected chunk for its pos(-ition) */ offset = hpa - p->res->start; offset = do_div(offset, gran * ways); @@ -2842,15 +2874,26 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) return false; } -static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled) +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; struct cxl_region_params *p = &cxlr->params; - int pos = cxled->pos; + struct cxl_endpoint_decoder *cxled = NULL; u16 eig = 0; u8 eiw = 0; + int pos; + + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxlmd == cxled_to_memdev(cxled)) + break; + } + if (!cxled || cxlmd != cxled_to_memdev(cxled)) + return ULLONG_MAX; + pos = cxled->pos; ways_to_eiw(p->interleave_ways, &eiw); granularity_to_eig(p->interleave_granularity, &eig); @@ -2884,27 +2927,21 @@ static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, /* Apply the hpa_offset to the region base address */ hpa = hpa_offset + p->res->start; - if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) - return ULLONG_MAX; + /* Root decoder translation overrides typical modulo decode */ + if (cxlrd->hpa_to_spa) + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); - return hpa; -} - -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa) -{ - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = NULL; - - for (int i = 0; i < p->nr_targets; i++) { - cxled = p->targets[i]; - if (cxlmd == cxled_to_memdev(cxled)) - break; + if (hpa < p->res->start || hpa > p->res->end) { + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in region\n", hpa); + return ULLONG_MAX; } - if (!cxled || cxlmd != cxled_to_memdev(cxled)) + + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; - return cxl_dpa_to_hpa(dpa, cxlr, cxled); + return hpa; } static struct lock_class_key cxl_pmem_region_key; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ee5cd4eb2f16ccfdb9163caa772eb91b3daf6caf..9167cfba7f592cfdf2f13b87cbb4ed779eb48266 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -340,23 +340,23 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID; /* General Media */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; /* Mask after flags have been parsed */ __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->device = get_unaligned_le24(rec->device); memcpy(__entry->comp_id, &rec->component_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE); - __entry->validity_flags = get_unaligned_le16(&rec->validity_flags); + __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); __entry->hpa = hpa; if (cxlr) { __assign_str(region_name); @@ -440,19 +440,19 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_DRAM_UUID; /* DRAM */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->validity_flags = get_unaligned_le16(rec->validity_flags); - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); __entry->bank_group = rec->bank_group; __entry->bank = rec->bank; @@ -704,8 +704,8 @@ TRACE_EVENT(cxl_poison, if (cxlr) { __assign_str(region); memcpy(__entry->uuid, &cxlr->params.uuid, 16); - __entry->hpa = cxl_trace_hpa(cxlr, cxlmd, - __entry->dpa); + __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, + __entry->dpa); } else { __assign_str(region); memset(__entry->uuid, 0, 16); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 720aa07976b04dd47ed3a374d7fa051f7dd082f0..9afb407d438faea6f845a39459447810212f951f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -434,14 +434,13 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, - int pos); +typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event - * @calc_hb: which host bridge covers the n'th position by granularity + * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie @@ -450,7 +449,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, struct cxl_root_decoder { struct resource *res; atomic_t region_id; - cxl_calc_hb_fn calc_hb; + cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; @@ -524,6 +523,7 @@ struct cxl_region_params { * @params: active + config params for the region * @coord: QoS access coordinates for the region * @memory_notifier: notifier for setting the access coordinates to node + * @adist_notifier: notifier for calculating the abstract distance of node */ struct cxl_region { struct device dev; @@ -536,6 +536,7 @@ struct cxl_region { struct cxl_region_params params; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; struct notifier_block memory_notifier; + struct notifier_block adist_notifier; }; struct cxl_nvdimm_bridge { @@ -774,9 +775,7 @@ bool is_root_decoder(struct device *dev); bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb); -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos); + unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index af8169ccdbc05534b85ba768c78dfcd7ecb46c88..afb53d058d6289271fe92e92c0f1f610d08d2739 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -161,7 +161,7 @@ struct cxl_mbox_cmd { C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ C(PADDR, -EFAULT, "physical address specified is invalid"), \ - C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(POISONLMT, -EBUSY, "poison injection limit has been reached"), \ C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ C(ABORT, -ENXIO, "background cmd was aborted by device"), \ C(SECURITY, -ENXIO, "not valid in the current security state"), \ @@ -563,7 +563,7 @@ enum cxl_opcode { 0x3b, 0x3f, 0x17) #define DEFINE_CXL_VENDOR_DEBUG_UUID \ - UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ + UUID_INIT(0x5e1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ 0x40, 0x3d, 0x86) struct cxl_mbox_get_supported_logs { diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2f1b49bfe162fd32c8bbda30421875e201004fda..7de232eaeb1747a928f1d2585579e2941e690005 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -253,6 +253,7 @@ static struct cxl_driver cxl_mem_driver = { module_cxl_driver(cxl_mem_driver); +MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index e53646e9f2fb93fff9886f6188d5b67b0bc27ce0..4be35dc22202dfb70d6a50122eda1fb0c4ff630e 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1066,5 +1066,6 @@ static void __exit cxl_pci_driver_exit(void) module_init(cxl_pci_driver_init); module_exit(cxl_pci_driver_exit); +MODULE_DESCRIPTION("CXL: PCI manageability"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 2ecdaee63021550b6ad0502444c62cf4a36b7b67..4ef93da2233558c6868a3e0878d848b53de79474 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -453,6 +453,7 @@ static __exit void cxl_pmem_exit(void) cxl_driver_unregister(&cxl_nvdimm_bridge_driver); } +MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); module_init(cxl_pmem_init); module_exit(cxl_pmem_exit); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 97c21566677aa3b4dbdd84d7d656198d53af63fd..d7d5d982ce69f3c3c2a5eeb6b9d0fc39e910656e 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -209,6 +209,7 @@ static struct cxl_driver cxl_port_driver = { }; module_cxl_driver(cxl_port_driver); +MODULE_DESCRIPTION("CXL: Port enumeration and services"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_PORT); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index cbc92d3683e65c7d8600bfba10ba30de403f4307..e5c05a8769471e1afcda7afa10fb9ab9a29a8cfb 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -109,8 +109,8 @@ static const u32 knl_interleave_list[] = { 0x104, 0x10c, 0x114, 0x11c, /* 20-23 */ }; #define MAX_INTERLEAVE \ - (max_t(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ - max_t(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ + (MAX_T(unsigned int, ARRAY_SIZE(sbridge_interleave_list), \ + MAX_T(unsigned int, ARRAY_SIZE(ibridge_interleave_list), \ ARRAY_SIZE(knl_interleave_list)))) struct interleave_pkg { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 11faf1db4fa4820a1a2759b6a32d39281eba0b3d..473421ba7a18a56a330d00979186666caaf0a0c0 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -45,7 +45,6 @@ #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 1f32d6cf98d6913ff4e0169868126fc50a5b11fd..ed4e8ddbe76a50e2266cfb9679e76a8169bad447 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -27,8 +27,10 @@ cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_STACKLEAK_PLUGIN) \ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ - $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax + $(call cc-option,-mno-single-pic-base) \ + $(DISABLE_STACKLEAK_PLUGIN) +cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax \ + $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt @@ -56,6 +58,10 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) +# The .data section would be renamed to .data.efistub, therefore, remove +# `-fdata-sections` flag from KBUILD_CFLAGS_KERNEL +KBUILD_CFLAGS_KERNEL := $(filter-out -fdata-sections, $(KBUILD_CFLAGS_KERNEL)) + lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 46ac5a8beab72a3e030077b2a521b856d1103fa0..e2e1e9df611543343a8498cdc425528a3e44619a 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -1444,5 +1444,6 @@ static void fsi_exit(void) } module_exit(fsi_exit); module_param(discard_errors, int, 0664); +MODULE_DESCRIPTION("FSI core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(discard_errors, "Don't invoke error handling on bus accesses"); diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index b0b624c3717bac1b87aee7ea55333cc09be57390..6f5e1bdf7e40e420dbb8523914c5e45d9433dcda 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -670,4 +670,5 @@ static struct platform_driver fsi_master_aspeed_driver = { }; module_platform_driver(fsi_master_aspeed_driver); +MODULE_DESCRIPTION("FSI master driver for AST2600"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c index f8c776ce1b5609380bfbc072355ac482c397ba15..a4c37ff8edd69d8c85d431baa97d3205f408f5bd 100644 --- a/drivers/fsi/fsi-master-ast-cf.c +++ b/drivers/fsi/fsi-master-ast-cf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2018 IBM Corp /* - * A FSI master controller, using a simple GPIO bit-banging interface + * A FSI master based on Aspeed ColdFire coprocessor */ #include @@ -1438,5 +1438,6 @@ static struct platform_driver fsi_master_acf = { }; module_platform_driver(fsi_master_acf); +MODULE_DESCRIPTION("A FSI master based on Aspeed ColdFire coprocessor"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME); diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c index 10fc344b6b221eaeda9be97b7a336efdcc244913..f761344f487329bfe9795cf2bab051ecbc52703a 100644 --- a/drivers/fsi/fsi-master-gpio.c +++ b/drivers/fsi/fsi-master-gpio.c @@ -892,4 +892,5 @@ static struct platform_driver fsi_master_gpio_driver = { }; module_platform_driver(fsi_master_gpio_driver); +MODULE_DESCRIPTION("A FSI master controller, using a simple GPIO bit-banging interface"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-hub.c b/drivers/fsi/fsi-master-hub.c index 6d8b6e8854e563e6c560ae6ec0a7a80c1c3d99bc..6568fed7db3cf531fe423a449aa7a5196537c473 100644 --- a/drivers/fsi/fsi-master-hub.c +++ b/drivers/fsi/fsi-master-hub.c @@ -295,4 +295,5 @@ static struct fsi_driver hub_master_driver = { }; module_fsi_driver(hub_master_driver); +MODULE_DESCRIPTION("FSI hub master driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index 61dbda9dbe2b0de3c844a768b22736aa557df8f3..411ddc018cd86a99a75b784f28ec04df42833a60 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -625,4 +625,5 @@ static void scom_exit(void) module_init(scom_init); module_exit(scom_exit); +MODULE_DESCRIPTION("SCOM FSI Client device driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 8dce78ea71395d56a92c179a35b50bd8c054617f..5762e517338eea0e4ee4bf2f2ed1261d0d2a7938 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -591,8 +591,6 @@ static int brcmstb_gpio_probe(struct platform_device *pdev) void __iomem *reg_base; struct brcmstb_gpio_priv *priv; struct resource *res; - struct property *prop; - const __be32 *p; u32 bank_width; int num_banks = 0; int num_gpios = 0; @@ -636,8 +634,7 @@ static int brcmstb_gpio_probe(struct platform_device *pdev) flags = BGPIOF_BIG_ENDIAN_BYTE_ORDER; #endif - of_property_for_each_u32(np, "brcm,gpio-bank-widths", prop, p, - bank_width) { + of_property_for_each_u32(np, "brcm,gpio-bank-widths", bank_width) { struct brcmstb_gpio_bank *bank; struct gpio_chip *gc; diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index d5906d419b0ab996a5286d8cc411929385bf2c4b..10ea71273c891521ac4e4f210d3441faa77e3309 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver); diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 0e0d55da4f0116c0cd94dd6c5c42e182542fc815..ccc47ea0b3e1227de099790ee01b893a4487c9ef 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -805,7 +805,7 @@ static int gpio_virtuser_dbgfs_init_line_attrs(struct device *dev, return -ENOMEM; data->ad.desc = desc; - sprintf(data->consumer, id); + strscpy(data->consumer, id); atomic_set(&data->irq, 0); atomic_set(&data->irq_count, 0); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fd0749c0c630f654b482d4565f8219ee07114730..6b2c6b91f96250c7cacda4d6bfdf0df1f33c787a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -268,6 +268,7 @@ config DRM_EXEC config DRM_GPUVM tristate depends on DRM + select DRM_EXEC help GPU-VM representation providing helpers to manage a GPUs virtual address space diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9dd8294032ef923331fa0691854102a9e08ca3dc..38408e4e158e57353e4f24f982558b3853bc764c 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -106,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9a639021f1f2703be6434b60fa4f9..6dfdff58bffd1fb4819e11f6291a63a9b20af6d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); @@ -1778,7 +1781,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1793,13 +1796,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f7089fb81c4954d8cbb6c434c713be6..c43d1b6e5d66b283056d672f17cec756ee265ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1a24f070a4ec63c40deb298c73bee..eb605e79ae0e1f5a1e986c63ea9de18347ad457e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b241f61fe9c92720e724c45421b6446459c96014..ac108fca64fe62a8d7a9429a253ef0b0055fdd25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = &df_v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = &df_v4_15_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 78089f2f79f5e55dba1fbcac86acd00224db376b..094498a0964b510a8e561c32b531fee277ab569d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -116,9 +116,10 @@ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ + * - 3.58.0 - Add GFX12 DCC support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 57 +#define KMS_DRIVER_MINOR 58 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6ca8d6b82abdcdb2368c675aa84918..c770cb201e64bc82a7337eb57437b744d02f4ea3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c0265902565670e98d28b8f015380840b06eb703..b49b3650fd62173b15c03d38889e63949b5b5bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497ea23485f4ad48abf6843ceda8946..4d951a1baefab7bcd347e1b6a14c07454d513843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65b4daa95fb06f9141c5a05bbe5b77..908e134551523366c4adc914cfa192232f5c0036 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b739b9189abb79b9286ede5f5932b..1cb1ec7beefed3fb22ddae4c8b5edf30d8699667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && @@ -1573,7 +1592,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71db9a4d07aeb7f26ec03c174bfcc0..0bc837dab578f96798787a535ec75446496a6ff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -83,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -113,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 800cc7a148b20054d58fbc1223bd62c489e20e0d..189574d53ebd30a7d37f3c2748624f9262707fb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1591,6 +1591,66 @@ static void psp_ras_ta_check_status(struct psp_context *psp) } } +static int psp_ras_send_cmd(struct psp_context *psp, + enum ras_command cmd_id, void *in, void *out) +{ + struct ta_ras_shared_memory *ras_cmd; + uint32_t cmd = cmd_id; + int ret = 0; + + if (!in) + return -EINVAL; + + mutex_lock(&psp->ras_context.mutex); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + switch (cmd) { + case TA_RAS_COMMAND__ENABLE_FEATURES: + case TA_RAS_COMMAND__DISABLE_FEATURES: + memcpy(&ras_cmd->ras_in_message, + in, sizeof(ras_cmd->ras_in_message)); + break; + case TA_RAS_COMMAND__TRIGGER_ERROR: + memcpy(&ras_cmd->ras_in_message.trigger_error, + in, sizeof(ras_cmd->ras_in_message.trigger_error)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + memcpy(&ras_cmd->ras_in_message.address, + in, sizeof(ras_cmd->ras_in_message.address)); + break; + default: + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd); + ret = -EINVAL; + goto err_out; + } + + ras_cmd->cmd_id = cmd; + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + + switch (cmd) { + case TA_RAS_COMMAND__TRIGGER_ERROR: + if (!ret && out) + memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + ret = -EINVAL; + else if (out) + memcpy(out, + &ras_cmd->ras_out_message.address, + sizeof(ras_cmd->ras_out_message.address)); + break; + default: + break; + } + +err_out: + mutex_unlock(&psp->ras_context.mutex); + + return ret; +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1632,23 +1692,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { - struct ta_ras_shared_memory *ras_cmd; + enum ras_command cmd_id; int ret; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - if (enable) - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; - else - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; - - ras_cmd->ras_in_message = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + cmd_id = enable ? + TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES; + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL); if (ret) return -EINVAL; @@ -1672,6 +1724,8 @@ int psp_ras_terminate(struct psp_context *psp) psp->ras_context.context.initialized = false; + mutex_destroy(&psp->ras_context.mutex); + return ret; } @@ -1756,9 +1810,10 @@ int psp_ras_initialize(struct psp_context *psp) ret = psp_ta_load(psp, &psp->ras_context.context); - if (!ret && !ras_cmd->ras_status) + if (!ret && !ras_cmd->ras_status) { psp->ras_context.context.initialized = true; - else { + mutex_init(&psp->ras_context.mutex); + } else { if (ras_cmd->ras_status) dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); @@ -1772,12 +1827,12 @@ int psp_ras_initialize(struct psp_context *psp) int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask) { - struct ta_ras_shared_memory *ras_cmd; struct amdgpu_device *adev = psp->adev; int ret; uint32_t dev_mask; + uint32_t ras_status = 0; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; switch (info->block_id) { @@ -1801,13 +1856,8 @@ int psp_ras_trigger_error(struct psp_context *psp, dev_mask &= AMDGPU_RAS_INST_MASK; info->sub_block_index |= dev_mask; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; - ras_cmd->ras_in_message.trigger_error = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status); if (ret) return -EINVAL; @@ -1817,9 +1867,9 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) + if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) return -EACCES; - else if (ras_cmd->ras_status) + else if (ras_status) return -EINVAL; return 0; @@ -1829,25 +1879,16 @@ int psp_ras_query_address(struct psp_context *psp, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out) { - struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras_context.context.initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; - ras_cmd->ras_in_message.address = *addr_in; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + if (!psp->ras_context.context.initialized || + !addr_in || !addr_out) return -EINVAL; - *addr_out = ras_cmd->ras_out_message.address; + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out); - return 0; + return ret; } // ras end diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3635303e65484c26319a20acb74acba4856be336..74a96516c913879e145b4eed95235c1a9370d5e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -200,6 +200,7 @@ struct psp_xgmi_context { struct psp_ras_context { struct ta_context context; struct amdgpu_ras *ras; + struct mutex mutex; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 8e8afbd237bcdffb74a907d30fc74e3dfd850ff4..0c856005df6b95642af7b7ae4bdbf6a33a86f7ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size context->session_id = ta_id; + mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) goto err_free_shared_buf; @@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret = -EFAULT; err_free_shared_buf: + mutex_unlock(&psp->ras_context.mutex); kfree(shared_buf); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index eae0a555df3c482f4535ff6f45ed9f7d88845801..aab8077e50988ef08c3422de907b8bd9f48a3784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1011,6 +1011,9 @@ Out: uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { + /* get available eeprom table version first before eeprom table init */ + amdgpu_ras_set_eeprom_table_version(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8b2525bb7a8922a5db76213285604..e6344a6b0a9f6af7e0df6fd0d7d012f693db5104 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 8d65b096db90097a618ead38fd6f8c5be5dae81b..43f44cc201cb808dc9313a27c02db1735e2063a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -147,6 +147,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } + /* from vcn4 and above, only unified queue is used */ + adev->vcn.using_unified_queue = + amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); @@ -275,18 +279,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } -/* from vcn4 and above, only unified queue is used */ -static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - bool ret = false; - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) - ret = true; - - return ret; -} - bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -397,7 +389,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) for (i = 0; i < adev->vcn.num_enc_rings; ++i) fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (fence[j] || @@ -443,7 +437,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { @@ -469,8 +465,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); atomic_dec(&ring->adev->vcn.total_submission_cnt); @@ -724,12 +724,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); - bool sq = amdgpu_vcn_using_unified_queue(ring); uint32_t *ib_checksum; uint32_t ib_pack_in_dw; int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -742,7 +741,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (sq) { + if (adev->vcn.using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -761,7 +760,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); @@ -851,15 +850,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -873,7 +872,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -895,7 +894,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -918,15 +917,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -940,7 +939,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -962,7 +961,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 9f06def236fdc0b2c5cdc5615f1f9cd1262de8a7..c87d68d4be536539c8bcca899cbc03058bd19c7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -329,6 +329,7 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool using_unified_queue; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -460,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b597d65f32bc3f3a0cc62973a8dac..b287a82e6177e9b3614595ada9eba8f721fc1363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a272cfd701d5043e1f0409124ecf35..a060c28f0877cd3b27547e58cd04354babead9ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += lower_32_bits(vm->generation); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c8802e6cac7ba9dd348c1e7349491..7d26a962f811cf0bb2b2fe2ff28a5a56f0dccd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index 0000000000000000000000000000000000000000..2a573e33908b88a07489b5c229fd645780320dc4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); + } +} + +const struct amdgpu_df_funcs df_v4_15_funcs = { + .hw_init = df_v4_15_hw_init +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h new file mode 100644 index 0000000000000000000000000000000000000000..dddf2422112a7f6ccb8b8f776ec94b625e765b2f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_15_H__ +#define __DF_V4_15_H__ + +extern const struct amdgpu_df_funcs df_v4_15_funcs; + +#endif /* __DF_V4_15_H__ */ + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800c2ba90f9a3dcd5adec27ffd28124..2c611b8577a7ec17cd9f908ce31456e290481787 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) @@ -3519,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3553,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3628904ea1f8778bb9b0b18589b76..2797fd84432b22d6ded588a047f40219187a282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e6d85e87aa4d1c121b358819d24b5..edcb5351f8cca7ce8f302cacd04189c12bf1cf1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657e9b7362708e56c27fc5d57d5a07d..98aa3ccd0d20244cc95a61cc0b854d7d12d46f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 04d8966423decf1879fd46e950a4724a09cfc6be..6ae5a784e18746e207ba51c5da7cb402ded49b1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -32,6 +33,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -621,6 +625,13 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } +static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through JPEG ring. + */ +} + /** * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer * @@ -772,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -817,7 +832,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -858,7 +879,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1062,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1072,6 +1100,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_jpeg_dec_ring_test_ring, .test_ib = amdgpu_jpeg_dec_ring_test_ib, .insert_nop = jpeg_v4_0_3_dec_ring_nop, @@ -1225,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f68564c381ecca77c13bfa08a5eb31f7c..71c54b294e157e060e54307bcfbda336d93a484b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a1ea4e82c52d53ede1b5cc036a102..f4daff90c7709daa16b5b5ece2adcf56892e0a3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1ae50331768745223786077b8b1d2..2ea8223eb969aeb8a58affc8711875cfa48ad53d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1163,6 +1173,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; @@ -1198,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1214,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1330,9 +1339,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1350,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1395,7 +1404,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad595158bf94b8761b95d9e148ed4535..e39a58d262c94e2392bd25922cd0a8866e9d51a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -551,10 +591,12 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -732,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -755,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -772,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -788,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1083,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1099,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1118,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1144,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1176,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1198,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1237,14 +1267,13 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1252,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1272,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1282,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1337,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1358,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1388,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1401,9 +1435,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1416,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1439,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1468,7 +1508,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1511,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caaba4d69c45dc4f3bf8da42c59e075f..0fbc3be81f140fcc81859767fc3ffe24646e144a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index cc9e961f007877d9d9c29d89f231b3959898acbc..af1e90159ce36c386f8cf4a77fcdc93c0c41c59b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,6 +176,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -1647,6 +1655,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) * but it shouldn't hurt for other parts since * this GFXOFF will be disallowed anyway when SDMA is * active, this just makes it explicit. + * sdma_v5_2_ring_set_wptr() takes advantage of this + * to update the wptr because sometimes SDMA seems to miss + * doorbells when entering PG. If you remove this, update + * sdma_v5_2_ring_set_wptr() as well! */ amdgpu_gfx_off_ctrl(adev, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dcde43add1f99064e013e0ee153907..ecee9e7d7e4c6bd30beae52723bd1f881d287309 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** @@ -1616,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 04c797d54511b8306e9db6f72cb67800dacf6b6a..0af648931df580c8c0ad17adb06a7f684d5dc6b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f05a78078ada111deedc00cd8cf17..e74e1983da53aa41a5950d871e76fde495d60463 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index d27fb4ea66121b8e74a5a3142d3d87ffb576051e..b0c3678cfb31d80cf6f590037a1043a758e644d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | @@ -484,6 +486,10 @@ static int soc24_common_hw_init(void *handle) */ if (adev->nbio.funcs->remap_hdp_registers) adev->nbio.funcs->remap_hdp_registers(adev); + + if (adev->df.funcs->hw_init) + adev->df.funcs->hw_init(adev); + /* enable the doorbell aperture */ soc24_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index f6d96a44d75ff3b62ed77917674ebcc9ca2c7819..776c539bfddacba8cb4c971de21378e44900c730 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1045,6 +1045,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1498,6 +1501,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f53054e39ebb473462d610ac419e3cdf1b300394..9bae95538b6287ccb344380ccabd1735710a350f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -1375,6 +1378,50 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through VCN ring. + */ +} + /** * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer * @@ -1414,7 +1461,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -1422,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index f45495de6875d58046837a6de82624c3674f33b1..8d75061f9f3847b919cd28a0e159612ed94fa0d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -958,6 +958,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1162,6 +1165,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 070b56610c7de24ed234b85e0d69d300978c889e..68c97fcd539b9028be59b96b1feffdc6b6dd9d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -721,6 +721,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -898,6 +901,9 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae44705c3b6e14b14e09ecfd48c268d..983a977632ff4e263b588dc5d140315f3e8e65fe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 5fd1b6b44577b82ce571842820363ae130762675..2d7755e2b6c320460405bb10b08bffc9e3c6907b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -137,6 +137,13 @@ struct vblank_control_work { bool enable; }; +/** + * struct idle_workqueue - Work data for periodic action in idle + * @work: Kernel work data for the work event + * @dm: amdgpu display manager device + * @enable: true if idle worker is enabled + * @running: true if idle worker is running + */ struct idle_workqueue { struct work_struct work; struct amdgpu_display_manager *dm; @@ -502,6 +509,12 @@ struct amdgpu_display_manager { * Deferred work for vblank control events. */ struct workqueue_struct *vblank_control_workqueue; + + /** + * @idle_workqueue: + * + * Periodic work for idle events. + */ struct idle_workqueue *idle_workqueue; struct drm_atomic_state *cached_state; @@ -587,7 +600,9 @@ struct amdgpu_display_manager { */ struct mutex dpia_aux_lock; - /* + /** + * @bb_from_dmub: + * * Bounding box data read from dmub during early initialization for DCN4+ */ struct dml2_soc_bb *bb_from_dmub; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508f673e162b5f7ca41c7ec9f04b472..2e9f6da1acdcad0e3236105afe0e5d78f64084c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b737367a57eeb608f86a0e1d1ecad0b09..600d6e22101112b83beb50b554246877c8e99d66 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index be2638c763d7897031d651433d8c66890ffe2454..9a406d74c0dd7617948782ab12ef8de3e003ab7a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -35,8 +35,10 @@ #include "dc_stream_priv.h" #define DC_LOGGER dc->ctx->logger +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(x, y) ((x > y) ? x : y) +#endif /******************************************************************************* * Private functions diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 067f6555cfdff6a5effbda552d1bd56d9670f2ab..ccbb15f1638c8b73e45602333d8e3d42b6548f26 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -143,7 +143,8 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - pipe_ctx->plane_state->status.is_flip_pending = false; + if (pipe_ctx->plane_state) + pipe_ctx->plane_state->status.is_flip_pending = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f452b6bc8584a614844524df06f48..4d960dc5ce8935f1d12c9a8fc4f3e4045adadf42 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1d86bc73f630679e55344e1add943..46f9c05de16e8c9035f9e26c0b5c481c274d52ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index defe13436a2c36d7c831eafc7f1dad553964a782..e73579f1a88e502d400f2537a3904d3dadbee852 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -64,8 +64,6 @@ double math_ceil(const double arg) double math_ceil2(const double arg, const double significance) { - ASSERT(significance != 0); - return ((int)(arg / significance + 0.99999)) * significance; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267beaef2cabfa6972c12677be00617f..14a902ff3b8ac9fac75f1dae4b2cb47e053635cd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -3587,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3680,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c10269ff9c6ed452dc1f3fdcd969ca775..f115c7a285e7766c36cf2efb56d242fa0b0d8013 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 9ac7fc717a92fd39c53f7af3b23d42d95b225ac6..0150f2581ee4c5b4e986a87a7abd792f2a497b4b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -147,16 +147,28 @@ struct cnv_color_keyer_params { int color_keyer_blue_high; }; -/* new for dcn2: set the 8bit alpha values based on the 2 bit alpha - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0 default: 0b00000000 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1 default: 0b01010101 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2 default: 0b10101010 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3 default: 0b11111111 +/** + * struct cnv_alpha_2bit_lut - Set the 8bit alpha values based on the 2 bit alpha */ struct cnv_alpha_2bit_lut { + /** + * @lut0: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0. Default: 0b00000000 + */ int lut0; + + /** + * @lut1: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1. Default: 0b01010101 + */ int lut1; + + /** + * @lut2: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2. Default: 0b10101010 + */ int lut2; + + /** + * @lut3: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3. Default: 0b11111111 + */ int lut3; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 40a9b3471208ef3e78449194666fdff2e78f55d7..3a89cc0cffc11f56fe0adec561a097394be3b1ac 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -1039,6 +1039,20 @@ struct mpc_funcs { */ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable, bool lut_bank_a, int mpcc_id); + /** + * @program_3dlut_size: + * + * Program 3D LUT size. + * + * Parameters: + * - [in/out] mpc - MPC context. + * - [in] is_17x17x17 - is 3dlut 17x17x17 + * - [in] mpcc_id + * + * Return: + * + * void + */ void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index 127fb1a5165495637042051fdbdef11691880ad0..747679cb49449569d9f32b3593e63137b48f2bde 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -205,9 +205,24 @@ struct gamma_coefficients { struct fixed31_32 user_brightness; }; +/** + * struct pwl_float_data - Fixed point RGB color + */ struct pwl_float_data { + /** + * @r: Component Red. + */ struct fixed31_32 r; + + /** + * @g: Component Green. + */ + struct fixed31_32 g; + + /** + * @b: Component Blue. + */ struct fixed31_32 b; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 50459d7a0f85f0a2829b18ec6eb36487dceded8b..b76737b7b9e41bc36ce5bb6591cd5b936c1d2d37 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -26,6 +26,16 @@ #include "core_types.h" #include "link_enc_cfg.h" +/** + * DOC: overview + * + * Display Input Output (DIO), is the display input and output unit in DCN. It + * includes output encoders to support different display output, like + * DisplayPort, HDMI, DVI interface, and others. It also includes the control + * and status channels for these interfaces. + */ + + void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -254,12 +264,31 @@ static const struct link_hwss dio_link_hwss = { }, }; +/** + * can_use_dio_link_hwss - Check if the link_hwss is accessible + * + * @link: Reference a link struct containing one or more sinks and the + * connective status. + * @link_res: Mappable hardware resource used to enable a link. + * + * Returns: + * Return true if the link encoder is accessible from link. + */ bool can_use_dio_link_hwss(const struct dc_link *link, const struct link_resource *link_res) { return link->link_enc != NULL; } +/** + * get_dio_link_hwss - Return link_hwss reference + * + * This function behaves like a get function to return the link_hwss populated + * in the link_hwss_dio.c file. + * + * Returns: + * Return the reference to the filled struct of link_hwss. + */ const struct link_hwss *get_dio_link_hwss(void) { return &dio_link_hwss; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h index a1f72fe378ee4824dd940f2c88565eda1330237f..45f0e091fcb01beb61cfa11feaae1a79572bf5bb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h @@ -23,15 +23,6 @@ * */ -/** - * DOC: overview - * - * Display Input Output (DIO), is the display input and output unit in DCN. It - * includes output encoders to support different display output, like - * DisplayPort, HDMI, DVI interface, and others. It also includes the control - * and status channels for these interfaces. - */ - #ifndef __LINK_HWSS_DIO_H__ #define __LINK_HWSS_DIO_H__ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 336488c0574e51b33c20bafced37c793a9dd94bc..94427875bcdd70bb328f407089f38a6a4d2629a2 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -945,19 +945,10 @@ void optc1_set_drr( OTG_FORCE_LOCK_ON_EVENT, 0, OTG_SET_V_TOTAL_MIN_MASK_EN, 0, OTG_SET_V_TOTAL_MIN_MASK, 0); - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); - - } else { - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_SET_V_TOTAL_MIN_MASK, 0, - OTG_V_TOTAL_MIN_SEL, 0, - OTG_V_TOTAL_MAX_SEL, 0, - OTG_FORCE_LOCK_ON_EVENT, 0); - - optc->funcs->set_vtotal_min_max(optc, 0, 0); } + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); } void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c index 43417cff2c9bcf0290576abcf289c810b83aad26..b4694985a40a42d08d4140a694df77f245278105 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c @@ -453,6 +453,16 @@ void optc2_setup_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + /* Set the min/max selectors unconditionally so that + * DMCUB fw may change OTG timings when necessary + * TODO: Remove the w/a after fixing the issue in DMCUB firmware + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + REG_SET_8(OTG_TRIGA_CNTL, 0, OTG_TRIGA_SOURCE_SELECT, 21, OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36ed19b856ff4744ce4cade2b3ecbf..8e0588b1cf3054a60e6164a51cab3f1599e14bc7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e67dc3fea45a6d8e7e785cb379d0f..34b02147881ddb721751ef37eaadbd1117803d96 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41fd7649fa7ad0311ec3ef9c5b6132..bb46f30d11d0a1ef5f38d8fba3c9d30cb46dfbc8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 7ecf76aea950514e12b4afd8599b147245d31285..6e064e6ae949f3979e06c33ff8597c27a2550422 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -25,7 +25,9 @@ #include "hdcp.h" +#ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif #define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/ #define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */ #define HDCP_MAX_AUX_TRANSACTION_SIZE 16 diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h new file mode 100644 index 0000000000000000000000000000000000000000..c2b009752f602aae3291bdd4be572e546c76a659 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_OFFSET_HEADER +#define _df_4_15_OFFSET_HEADER + +#define regNCSConfigurationRegister1 0x0901 +#define regNCSConfigurationRegister1_BASE_IDX 4 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h new file mode 100644 index 0000000000000000000000000000000000000000..9868a9c327954a32ee8306e370954e01ba7677a5 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _df_4_15_SH_MASK_HEADER +#define _df_4_15_SH_MASK_HEADER + +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT 0x3 +#define NCSConfigurationRegister1__DisIntAtomicsLclProcessing_MASK 0x0003FFF8L + +#endif diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d3622512338797ed3e81dd3de7c196dcfed..21ceafce1f9b27c96486e14787505c64a4484f13 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3702de090d0b9160ee8b70b665484..101e2fe962c6a6bc1b34023ffe6bb9d5d2308e1c 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ @@ -94,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -239,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf9a8f138f2ec98ff9efd9ab26332b..8b7d6ed7e2ed235af9dcff50f14bb2f3996803dc 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h index 6f54c410c2f9859392e78e15b1334d13b8713218..409aeec6baa92193db436878b1d0552b00c126b6 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h @@ -22,12 +22,18 @@ */ #include -#define SHIFT_AMOUNT 16 /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ +enum ppevvmath_constants { + /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ + SHIFT_AMOUNT = 16, -#define PRECISION 5 /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + PRECISION = 5, -#define SHIFTED_2 (2 << SHIFT_AMOUNT) -#define MAX (1 << (SHIFT_AMOUNT - 1)) - 1 /* 32767 - Might change in the future */ + SHIFTED_2 = (2 << SHIFT_AMOUNT), + + /* 32767 - Might change in the future */ + MAX = (1 << (SHIFT_AMOUNT - 1)) - 1, +}; /* ------------------------------------------------------------------------------- * NEW TYPE - fINT diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb8643d25d1bc60a735ee3536af61d5c0a3df63a..9d7454b3c3143dead4d3a207401aa8a6c1777e5b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1924,20 +1924,12 @@ static int smu_disable_dpms(struct smu_context *smu) } /* - * For SMU 13.0.4/11 and 14.0.0, PMFW will handle the features disablement properly + * For GFX11 and subsequent APUs, PMFW will handle the features disablement properly * for gpu reset and S0i3 cases. Driver involvement is unnecessary. */ - if (amdgpu_in_reset(adev) || adev->in_s0ix) { - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 4): - case IP_VERSION(13, 0, 11): - case IP_VERSION(14, 0, 0): - case IP_VERSION(14, 0, 1): - return 0; - default: - break; - } - } + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) >= 11 && + smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) + return 0; /* * For gpu reset, runpm and hibernation through BACO, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed73185a17e427d4d859c63158946ef..75c921e87360eb397a0dc874660b4207873c90df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 5d47d58944f66194ba966bc74c1ff61aa18df432..8798ebfcea8328a424c873e7c91dca7308cbf1bf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -69,6 +69,9 @@ #define SMU_14_0_0_UMD_PSTATE_SOCCLK 678 #define SMU_14_0_0_UMD_PSTATE_FCLK 1800 +#define SMU_14_0_4_UMD_PSTATE_GFXCLK 938 +#define SMU_14_0_4_UMD_PSTATE_SOCCLK 938 + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -1296,19 +1299,28 @@ static int smu_v14_0_common_get_dpm_profile_freq(struct smu_context *smu, switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + clk_limit = SMU_14_0_4_UMD_PSTATE_GFXCLK; + else + clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &clk_limit); else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &clk_limit, NULL); break; case SMU_SOCCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + clk_limit = SMU_14_0_4_UMD_PSTATE_SOCCLK; + else + clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &clk_limit); break; case SMU_FCLK: - clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); + else + clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca18c0efdbb4bad33367edc2454202..e1a27903c80a149a115cc3ae916cacb2593e3038 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 88eefef05faedce4af8b7b0e2d27a0ace00038ec..91ad434bcdaeb421734fb486f6a0fbf7dfa65c82 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -794,7 +794,7 @@ static const char *smu_get_feature_name(struct smu_context *smu, size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu, char *buf) { - int8_t sort_feature[max(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; + int8_t sort_feature[MAX(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; uint64_t feature_mask; int i, feature_index; uint32_t count = 0; diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 1e9259416980ec49cce1b7fc080f562f002e29c5..e6c7f0d64e99592534d1736013647972b83e64b7 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -158,7 +158,14 @@ void ast_dp_launch(struct drm_device *dev) ASTDP_HOST_EDID_READ_DONE); } +bool ast_dp_power_is_on(struct ast_device *ast) +{ + u8 vgacre3; + + vgacre3 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xe3); + return !(vgacre3 & AST_DP_PHY_SLEEP); +} void ast_dp_power_on_off(struct drm_device *dev, bool on) { diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index aae019e79bda88197bc08bc998bce20ee116cac2..225817087b4d2f8459a03ef7f5416f870fc2b65f 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -391,6 +391,11 @@ static int ast_drm_freeze(struct drm_device *dev) static int ast_drm_thaw(struct drm_device *dev) { + struct ast_device *ast = to_ast_device(dev); + + ast_enable_vga(ast->ioregs); + ast_open_key(ast->ioregs); + ast_enable_mmio(dev->dev, ast->ioregs); ast_post_gpu(dev); return drm_mode_config_helper_resume(dev); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index ba3d86973995fd0444f1b9934e4cc26fd5e769fb..47bab5596c16ed24714bb000e4305d0564556745 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -472,6 +472,7 @@ void ast_init_3rdtx(struct drm_device *dev); bool ast_astdp_is_connected(struct ast_device *ast); int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata); void ast_dp_launch(struct drm_device *dev); +bool ast_dp_power_is_on(struct ast_device *ast); void ast_dp_power_on_off(struct drm_device *dev, bool no); void ast_dp_set_on_off(struct drm_device *dev, bool no); void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index dc8f639e82fdf5dadfec93f4ad6faa53728d00be..049ee1477c337539caa1c54046be1bb62a8a455e 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -28,6 +28,7 @@ * Authors: Dave Airlie */ +#include #include #include @@ -1687,11 +1688,35 @@ static int ast_astdp_connector_helper_detect_ctx(struct drm_connector *connector struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_device *dev = connector->dev; struct ast_device *ast = to_ast_device(connector->dev); + enum drm_connector_status status = connector_status_disconnected; + struct drm_connector_state *connector_state = connector->state; + bool is_active = false; + + mutex_lock(&ast->modeset_lock); + + if (connector_state && connector_state->crtc) { + struct drm_crtc_state *crtc_state = connector_state->crtc->state; + + if (crtc_state && crtc_state->active) + is_active = true; + } + + if (!is_active && !ast_dp_power_is_on(ast)) { + ast_dp_power_on_off(dev, true); + msleep(50); + } if (ast_astdp_is_connected(ast)) - return connector_status_connected; - return connector_status_disconnected; + status = connector_status_connected; + + if (!is_active && status == connector_status_disconnected) + ast_dp_power_on_off(dev, false); + + mutex_unlock(&ast->modeset_lock); + + return status; } static const struct drm_connector_helper_funcs ast_astdp_connector_helper_funcs = { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 22bbb2d83e3073518505d91a6c5c6898150aa0e6..7936c20239551817bc8236ef6c1f65606084a39d 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1070,21 +1070,17 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } - if (async_flip && prop != config->prop_fb_id) { + if (async_flip && + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 0869b663f17e4d3179efe59b5fe200b033f8f7ac..a4fbf1eb7ac5a845588600d96ccc4ad645fa5a14 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -443,10 +443,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, panel_bridge = bridge; } - if (connector_type == DRM_MODE_CONNECTOR_Unknown) { - kfree(bridge_connector); + if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - } if (bridge_connector->bridge_hdmi) ret = drmm_connector_hdmi_init(drm, connector, @@ -461,10 +459,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) { - kfree(bridge_connector); + if (ret) return ERR_PTR(ret); - } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ecc19a232d489af91fc2464fff9975..103c185bb1c8a76ac0fc7a45ce2a5f34c683298a 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2803ac111bbd8ad5a069b7700832365175de76da..bfedcbf516dbe2b1efd037ed22efe4cfe139da48 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -355,7 +355,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer, err_drm_gem_vmap_unlocked: drm_gem_unlock(gem); - return 0; + return ret; } EXPORT_SYMBOL(drm_client_buffer_vmap_local); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a099810f282e7c18ef3da940132105e..cee5eafbfb81a8bbc12cbdce0d6f29022f513e89 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index d021497841b84665b08db26088ec67eab9c5f45e..3969dc548cff605cbdd3d56dceafb2ca00a5c886 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -532,7 +532,7 @@ int drm_plane_create_color_properties(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct drm_property *prop; - struct drm_prop_enum_list enum_list[max_t(int, DRM_COLOR_ENCODING_MAX, + struct drm_prop_enum_list enum_list[MAX_T(int, DRM_COLOR_ENCODING_MAX, DRM_COLOR_RANGE_MAX)]; int i, len; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 18565ec6845173ca7216ff5e37ea5063846274b3..56ac37ea2f27484348fe501eee767b6b68034866 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -624,6 +624,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { + /* + * This function may be invoked by panic() to flush the frame + * buffer, where all CPUs except the panic CPU are stopped. + * During the following schedule_work(), the panic CPU needs + * the worker_pool lock, which might be held by a stopped CPU, + * causing schedule_work() and panic() to block. Return early on + * oops_in_progress to prevent this blocking. + */ + if (oops_in_progress) + return; + drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3f84d7527793ef99db4d4e3d595c4564925f8eaa..0830cae9a4d0f5e6692258a8bec1dca033f323d7 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), @@ -414,6 +426,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), }, .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* OrangePi Neo */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "NEO-01"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d14e2952bf2d0af3761b82743ebe30..6c3333136737e7b2db71404e18242276399ae663 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3903f6ead6e6602daba0e26917ebbd6d8005afbf..59f11af3b0a1dca622cf9e8ecb5be3c715533c22 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5314,6 +5314,8 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + intel_dp->link_trained = false; + intel_dp_check_frl_training(intel_dp); intel_dp_pcon_dsc_configure(intel_dp, crtc_state); intel_dp_start_link_train(NULL, intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 1bc4ef84ff3bcda93940f172843e9a83be8d2625..d044c8e36bb3d60551b76ab5220d8252ffe83849 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -117,10 +117,24 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; } -static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) +{ + return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == + DP_PHY_REPEATER_MODE_TRANSPARENT; +} + +/* + * Read the LTTPR common capabilities and switch the LTTPR PHYs to + * non-transparent mode if this is supported. Preserve the + * transparent/non-transparent mode on an active link. + * + * Return the number of detected LTTPRs in non-transparent mode or 0 if the + * LTTPRs are in transparent mode or the detection failed. + */ +static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { int lttpr_count; - int i; if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) return 0; @@ -134,6 +148,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI if (lttpr_count == 0) return 0; + /* + * Don't change the mode on an active link, to prevent a loss of link + * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR + * resetting its internal state when the mode is changed from + * non-transparent to transparent. + */ + if (intel_dp->link_trained) { + if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) + goto out_reset_lttpr_count; + + return lttpr_count; + } + /* * See DP Standard v2.0 3.6.6.1. about the explicit disabling of * non-transparent mode and the disable->enable non-transparent mode @@ -154,11 +181,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI "Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n"); intel_dp_set_lttpr_transparent_mode(intel_dp, true); - intel_dp_reset_lttpr_count(intel_dp); - return 0; + goto out_reset_lttpr_count; } + return lttpr_count; + +out_reset_lttpr_count: + intel_dp_reset_lttpr_count(intel_dp); + + return 0; +} + +static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + int lttpr_count; + int i; + + lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); + for (i = 0; i < lttpr_count; i++) intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); @@ -1482,10 +1523,10 @@ void intel_dp_start_link_train(struct intel_atomic_state *state, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; bool passed; - /* - * TODO: Reiniting LTTPRs here won't be needed once proper connector - * HW state readout is added. + * Reinit the LTTPRs here to ensure that they are switched to + * non-transparent mode. During an earlier LTTPR detection this + * could've been prevented by an active link. */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 90998b037349d398e338af5a1b01c2ec80d91186..292d163036b12a3817f77f40906854b729a42c92 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) struct skl_wrpll_params wrpll_params = {}; int ret; - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index a568a457e53268011a6c932efc7f3dc1b7cb9fb0..f590d7f48ba74fdbc822de1e649180ced766c6de 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -251,7 +251,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba864aa78dfae2a19a55f9e9c4f72834..7ce926241e83a234ed36d62cb8ef6c4710a214f7 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625fe3593667315d158eb8abfca43a8..cac6d4184506cc7c289065a37e47e43bf344dae3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; @@ -1084,6 +1125,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0fa7abd1e127337ce27a2ded4a5a14..5c72462d1f57e3625f3b8271524a6e3f3f718012 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { @@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 21829439e68671b5ef3a7e0b4e373aaf6191d55b..72090f52fb850fedb0702069ba662cf9f586998a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3315,11 +3315,7 @@ static void remove_from_engine(struct i915_request *rq) static bool can_preempt(struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) > 8) - return true; - - /* GPGPU on bdw requires extra w/a; not implemented */ - return engine->class != RENDER_CLASS; + return GRAPHICS_VER(engine->i915) > 8; } static void kick_execlists(const struct i915_request *rq, int prio) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b1cd4c7a525f4b53049936f7c31c8a0da8d8509..025a79fe5920e428882d347bc169fb27e48cf409 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2748,26 +2748,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, return 0; } -static int -gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config, - struct i915_active *active) -{ - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE), - CTX_R_PWR_CLK_STATE, - }, - }; - - if (stream->engine->class != RENDER_CLASS) - return 0; - - return oa_configure_all_contexts(stream, - regs, ARRAY_SIZE(regs), - active); -} - static int lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, @@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; u32 sqcnt1; @@ -2918,15 +2897,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); - /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = gen12_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; - /* * For Gen12, performance counters are context * saved/restored. Only enable it for the context that @@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); } - /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL, NULL); - /* disable the context save/restore or OAR counters */ if (stream->ctx) gen12_configure_oar_context(stream, NULL); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 172dfa7c3588be0c850da824e139cc1215c8730b..d40ee1b42110a336df00f18cc957cde93fea2582 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -368,8 +368,10 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) goto out_cleanup; } - mem->id = i; - i915->mm.regions[i] = mem; + if (mem) { /* Skip on non-fatal errors */ + mem->id = i; + i915->mm.regions[i] = mem; + } } for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e61d4ca463dcd1b7d249e836a6a4b..77b50c56c124ce48831aa2839c9fc45548a86e37 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0712d0b15170260bf53e63831b9638ccab8bd6db..70fb003a66669a1638b22514e7d385494a5d73cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -898,7 +898,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, * Without this the operation can timeout and we'll fallback to a * software copy, which might take several minutes to finish. */ - nouveau_fence_wait(fence, false); + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, new_reg); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 66fca95c10c761ea597bbb742fc97b2bcd577511..7c97b288680760819971a7823221da5e7179c7ab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) ret = nouveau_fence_new(&fence, chan); if (!ret) { - ret = nouveau_fence_wait(fence, false); + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 6719353e2e13f997cdf9732fc6054b75f49f97ea..6fb65b01d778049f9166681e26a7fdf85891e475 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { if (fence) { - nouveau_fence_wait(*fence, false); + nouveau_fence_wait(*fence, true, false); nouveau_fence_unref(fence); } else { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ba469767a20fd9d203c7e2eac56250c80d1de1e8..93f08f9479d89bfda87fbeef246c9dd702f047a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -311,11 +311,39 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) return timeout - t; } +static int +nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) +{ + int ret = 0; + + while (!nouveau_fence_done(fence)) { + if (time_after_eq(jiffies, fence->timeout)) { + ret = -EBUSY; + break; + } + + __set_current_state(intr ? + TASK_INTERRUPTIBLE : + TASK_UNINTERRUPTIBLE); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + + __set_current_state(TASK_RUNNING); + return ret; +} + int -nouveau_fence_wait(struct nouveau_fence *fence, bool intr) +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) { long ret; + if (!lazy) + return nouveau_fence_wait_busy(fence, intr); + ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 1b63197b744a207030b4e0b9235c72cfbbc93866..8bc065acfe35870f62bd0f2e37df47a35eb8ae38 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -int nouveau_fence_wait(struct nouveau_fence *, bool intr); +int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); struct nouveau_fence_chan { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2e535caa7d6ea2ecc2582755fb1c271f99b94346..5a887d67dc0e8c71cf1987acb68f48f4bbf05d70 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -928,7 +928,7 @@ revalidate: } if (sync) { - if (!(ret = nouveau_fence_wait(fence, false))) { + if (!(ret = nouveau_fence_wait(fence, false, false))) { if ((ret = dma_fence_get_status(&fence->base)) == 1) ret = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b58ab595faf8278c661ce2c9988c09c98f47a515..cd95446d6851113995f8723cdd3a8a022d05f356 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -64,7 +64,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); obj = ERR_PTR(-ENOMEM); goto unlock; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 9402fa320a7e9d28380da59aef38127b284d696f..48f105239f42d8ffa3cefd253bd83d52dbb3255f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1803,6 +1803,7 @@ nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0); return nouveau_bo_validate(nvbo, true, false); } diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d21b955f11720164e0b1b2a93117e3..64e440a2649b01cb1521dc7dd73b011464ac7061 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 1fe6e0d883c79be07817a3cf582f128c9d5d97dd..e5577d2a19ef497b9da601a42dc241f9a460dd6a 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -33,8 +33,10 @@ #include "evergreen_reg_safe.h" #include "cayman_reg_safe.h" +#ifndef MIN #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a29469090a85891d4ae7f86387c2c0c9b..dec6913cec5b59077123a33e8c886c3fe688ea54 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index c3758faa1b83a36cf76448c339396744109f77ea..d8d0e4d1682fcad3d2156562e52eca89dc4b5e74 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index a47f00b443d3095bc6a0fbc7533adaf437084c9f..5982941d933b2730b5fcad58f15651b9cbeaa5b6 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -265,7 +265,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) struct v3d_dev *v3d; int ret; u32 mmu_debug; - u32 ident1; + u32 ident1, ident3; u64 mask; v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm); @@ -298,6 +298,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ + ident3 = V3D_READ(V3D_HUB_IDENT3); + v3d->rev = V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV); + if (v3d->ver >= 71) v3d->max_counters = V3D_V71_NUM_PERFCOUNTERS; else if (v3d->ver >= 42) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 099b962bdfde3a878bda56c509456457539c69ca..a0febdb6f21452d97ec44aba8ea07a371aa3edca 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -98,10 +98,12 @@ struct v3d_perfmon { struct v3d_dev { struct drm_device drm; - /* Short representation (e.g. 33, 41) of the V3D tech version - * and revision. - */ + /* Short representation (e.g. 33, 41) of the V3D tech version */ int ver; + + /* Short representation (e.g. 5, 6) of the V3D tech revision */ + int rev; + bool single_irq_line; /* Different revisions of V3D have different total number of performance @@ -563,6 +565,10 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); /* v3d_sched.c */ +void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count); +void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count); void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 03df37a3acf5c52bba205b186ec87954d5870e15..b8682818bafa6cd9b39919447d4ac3ee64ceb625 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -73,24 +73,44 @@ v3d_sched_job_free(struct drm_sched_job *sched_job) v3d_job_cleanup(job); } +void +v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + +void +v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + static void v3d_cpu_job_free(struct drm_sched_job *sched_job) { struct v3d_cpu_job *job = to_cpu_job(sched_job); - struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; - struct v3d_performance_query_info *performance_query = &job->performance_query; - if (timestamp_query->queries) { - for (int i = 0; i < timestamp_query->count; i++) - drm_syncobj_put(timestamp_query->queries[i].syncobj); - kvfree(timestamp_query->queries); - } + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); - if (performance_query->queries) { - for (int i = 0; i < performance_query->count; i++) - drm_syncobj_put(performance_query->queries[i].syncobj); - kvfree(performance_query->queries); - } + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); v3d_job_cleanup(&job->base); } @@ -295,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -315,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); @@ -331,7 +359,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); struct drm_v3d_submit_csd *args = &indirect_csd->job->args; - u32 *wg_counts; + struct v3d_dev *v3d = job->base.v3d; + u32 num_batches, *wg_counts; v3d_get_bo_vaddr(bo); v3d_get_bo_vaddr(indirect); @@ -344,8 +373,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]); + + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) + args->cfg[4] = num_batches - 1; + else + args->cfg[4] = num_batches; + + WARN_ON(args->cfg[4] == ~0); for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 88f63d526b22365b42b90e90d5b451a56e3fda52..4cdfabbf4964f9b9a71343d3f97b466781773bef 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -452,6 +452,8 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_timestamp_query timestamp; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -480,26 +482,34 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, offsets = u64_to_user_ptr(timestamp.offsets); syncs = u64_to_user_ptr(timestamp.syncs); - for (int i = 0; i < timestamp.count; i++) { + for (i = 0; i < timestamp.count; i++) { u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = timestamp.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -509,6 +519,8 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, { u32 __user *syncs; struct drm_v3d_reset_timestamp_query reset; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -533,21 +545,29 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; job->timestamp_query.queries[i].offset = reset.offset + 8 * i; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = reset.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } /* Get data for the copy timestamp query results job submission. */ @@ -558,7 +578,8 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_copy_timestamp_query copy; - int i; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -591,18 +612,22 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = copy.count; @@ -613,6 +638,10 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -623,6 +652,8 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_reset_performance_query reset; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -637,6 +668,9 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, if (copy_from_user(&reset, ext, sizeof(reset))) return -EFAULT; + if (reset.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(reset.count, @@ -648,39 +682,47 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < reset.nperfmons; j++) { + for (j = 0; j < reset.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = reset.count; job->performance_query.nperfmons = reset.nperfmons; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } static int @@ -691,6 +733,8 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_copy_performance_query copy; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -708,6 +752,9 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, if (copy.pad) return -EINVAL; + if (copy.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(copy.count, @@ -719,34 +766,38 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(copy.syncs); kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); - for (int i = 0; i < copy.count; i++) { + for (i = 0; i < copy.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < copy.nperfmons; j++) { + for (j = 0; j < copy.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = copy.count; job->performance_query.nperfmons = copy.nperfmons; @@ -759,6 +810,10 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 1c7c7f61a222895dc5bcef38d5f82132e818c9a1..7d34cf83f5f2bb6a87612dc277da54750d63371c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -48,7 +48,7 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { - u32 context = submit->fence_ctx + submit->ring_idx; + u64 context = submit->fence_ctx + submit->ring_idx; if (dma_fence_match_context(in_fence, context)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h index b0d87c5f58d8ed711d2c90965e74bbb9ad3101f1..1ac3cb151b117e8dca872290979b94d0ac3ad51d 100644 --- a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h +++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT + * + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -31,6 +33,10 @@ #include +#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) ((svga3d_flags) >> 32) +#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ + ((svga3d_flags) & ((uint64_t)U32_MAX)) + static inline u32 clamped_umul32(u32 a, u32 b) { uint64_t tmp = (uint64_t) a*b; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 00144632c600ebb6a63ab80c55eae90eed95375f..f42ebc4a7c22552ef0f518390bd725201ea6c34e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2011-2023 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,15 +28,39 @@ #include "vmwgfx_bo.h" #include "vmwgfx_drv.h" - +#include "vmwgfx_resource_priv.h" #include static void vmw_bo_release(struct vmw_bo *vbo) { + struct vmw_resource *res; + WARN_ON(vbo->tbo.base.funcs && kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); + + xa_destroy(&vbo->detached_resources); + WARN_ON(vbo->is_dumb && !vbo->dumb_surface); + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + WARN_ON(vbo != res->guest_memory_bo); + WARN_ON(!res->guest_memory_bo); + if (res->guest_memory_bo) { + /* Reserve and switch the backing mob. */ + mutex_lock(&res->dev_priv->cmdbuf_mutex); + (void)vmw_resource_reserve(res, false, true); + vmw_resource_mob_detach(res); + if (res->coherent) + vmw_bo_dirty_release(res->guest_memory_bo); + res->guest_memory_bo = NULL; + res->guest_memory_offset = 0; + vmw_resource_unreserve(res, false, false, false, NULL, + 0); + mutex_unlock(&res->dev_priv->cmdbuf_mutex); + } + vmw_surface_unreference(&vbo->dumb_surface); + } drm_gem_object_release(&vbo->tbo.base); } @@ -325,6 +349,11 @@ void vmw_bo_pin_reserved(struct vmw_bo *vbo, bool pin) * */ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) +{ + return vmw_bo_map_and_cache_size(vbo, vbo->tbo.base.size); +} + +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) { struct ttm_buffer_object *bo = &vbo->tbo; bool not_used; @@ -335,9 +364,10 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) if (virtual) return virtual; - ret = ttm_bo_kmap(bo, 0, PFN_UP(bo->base.size), &vbo->map); + ret = ttm_bo_kmap(bo, 0, PFN_UP(size), &vbo->map); if (ret) - DRM_ERROR("Buffer object map failed: %d.\n", ret); + DRM_ERROR("Buffer object map failed: %d (size: bo = %zu, map = %zu).\n", + ret, bo->base.size, size); return ttm_kmap_obj_virtual(&vbo->map, ¬_used); } @@ -390,6 +420,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; + xa_init(&vmw_bo->detached_resources); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); @@ -654,52 +685,6 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, dma_fence_put(&fence->base); } - -/** - * vmw_dumb_create - Create a dumb kms buffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @args: Pointer to a struct drm_mode_create_dumb structure - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm create_dumb functionality. - * Note that this is very similar to the vmw_bo_alloc ioctl, except - * that the arguments have a different format. - */ -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_bo *vbo; - int cpp = DIV_ROUND_UP(args->bpp, 8); - int ret; - - switch (cpp) { - case 1: /* DRM_FORMAT_C8 */ - case 2: /* DRM_FORMAT_RGB565 */ - case 4: /* DRM_FORMAT_XRGB8888 */ - break; - default: - /* - * Dumb buffers don't allow anything else. - * This is tested via IGT's dumb_buffers - */ - return -EINVAL; - } - - args->pitch = args->width * cpp; - args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); - - ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, - args->size, &args->handle, - &vbo); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&vbo->tbo.base); - return ret; -} - /** * vmw_bo_swap_notify - swapout notify callback. * @@ -853,3 +838,43 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } + +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); +} + +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_erase(&vbo->detached_resources, (unsigned long)res); +} + +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo) +{ + unsigned long index; + struct vmw_resource *res = NULL; + struct vmw_surface *surf = NULL; + struct rb_node *rb_itr = vbo->res_tree.rb_node; + + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + goto out; + } + + xa_for_each(&vbo->detached_resources, index, res) { + if (res->func->res_type == vmw_res_surface) + goto out; + } + + for (rb_itr = rb_first(&vbo->res_tree); rb_itr; + rb_itr = rb_next(rb_itr)) { + res = rb_entry(rb_itr, struct vmw_resource, mob_node); + if (res->func->res_type == vmw_res_surface) + goto out; + } + +out: + if (res) + surf = vmw_res_to_srf(res); + return surf; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index f349642e6190d6933031d08ccd7f353231f0f1da..62b4342d5f7c5053aaca65487e2607c8ea6949a1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2023-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ #include #include +#include struct vmw_bo_dirty; struct vmw_fence_obj; struct vmw_private; struct vmw_resource; +struct vmw_surface; enum vmw_bo_domain { VMW_BO_DOMAIN_SYS = BIT(0), @@ -85,11 +88,15 @@ struct vmw_bo { struct rb_root res_tree; u32 res_prios[TTM_MAX_BO_PRIORITY]; + struct xarray detached_resources; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; struct vmw_bo_dirty *dirty; + + bool is_dumb; + struct vmw_surface *dumb_surface; }; void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain); @@ -124,15 +131,21 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); void *vmw_bo_map_and_cache(struct vmw_bo *vbo); +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size); void vmw_bo_unmap(struct vmw_bo *vbo); void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); + int vmw_user_bo_lookup(struct drm_file *filp, u32 handle, struct vmw_bo **out); + /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority * according to attached resources diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a1ce41e1c4684ea41c418265d1e35f7b6fbba54b..32f50e59580972dac68b4212c0a15d1c3eebd90e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -762,6 +763,26 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, int gmr_id); extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); +/** + * User handles + */ +struct vmw_user_object { + struct vmw_surface *surface; + struct vmw_bo *buffer; +}; + +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, + u32 handle, struct vmw_user_object *uo); +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo); +void vmw_user_object_unref(struct vmw_user_object *uo); +bool vmw_user_object_is_null(struct vmw_user_object *uo); +struct vmw_surface *vmw_user_object_surface(struct vmw_user_object *uo); +struct vmw_bo *vmw_user_object_buffer(struct vmw_user_object *uo); +void *vmw_user_object_map(struct vmw_user_object *uo); +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); +void vmw_user_object_unmap(struct vmw_user_object *uo); +bool vmw_user_object_is_mapped(struct vmw_user_object *uo); + /** * Resource utilities - vmwgfx_resource.c */ @@ -776,11 +797,6 @@ extern int vmw_resource_validate(struct vmw_resource *res, bool intr, extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); -extern int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf); extern int vmw_user_resource_lookup_handle( struct vmw_private *dev_priv, struct ttm_object_file *tfile, @@ -1057,9 +1073,6 @@ int vmw_kms_suspend(struct drm_device *dev); int vmw_kms_resume(struct drm_device *dev); void vmw_kms_lost_device(struct drm_device *dev); -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); @@ -1176,6 +1189,15 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, int vmw_gb_surface_define(struct vmw_private *dev_priv, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out); +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); /* * Shader management - vmwgfx_shader.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 5efc6a766f64e467a68223376b2a97aa62b278e6..588d50ababf604933d68877c516441900dc1f831 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -32,7 +32,6 @@ #define VMW_FENCE_WRAP (1 << 31) struct vmw_fence_manager { - int num_fence_objects; struct vmw_private *dev_priv; spinlock_t lock; struct list_head fence_list; @@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); - struct vmw_fence_manager *fman = fman_from_fence(fence); - spin_lock(&fman->lock); - list_del_init(&fence->head); - --fman->num_fence_objects; - spin_unlock(&fman->lock); + if (!list_empty(&fence->head)) { + spin_lock(&fman->lock); + list_del_init(&fence->head); + spin_unlock(&fman->lock); + } fence->destroy(fence); } @@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = { .release = vmw_fence_obj_destroy, }; - /* * Execute signal actions on fences recently signaled. * This is done from a workqueue so we don't have to execute @@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, goto out_unlock; } list_add_tail(&fence->head, &fman->fence_list); - ++fman->num_fence_objects; out_unlock: spin_unlock(&fman->lock); @@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - struct vmw_fence_obj *fence; + struct vmw_fence_obj *fence, *next_fence; if (likely(!fman->seqno_valid)) return false; @@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, return false; fman->seqno_valid = false; - list_for_each_entry(fence, &fman->fence_list, head) { + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; vmw_fence_goal_write(fman->dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 07185c108218fc3b66f7daf620eaaec617138298..b9857f37ca1ac6f81b6285270a63c4de9a0b6f2e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* - * Copyright 2021-2023 VMware, Inc. + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -78,6 +79,59 @@ static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages); } +static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + int ret; + + if (obj->import_attach) { + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); + if (!ret) { + if (drm_WARN_ON(obj->dev, map->is_iomem)) { + dma_buf_vunmap(obj->import_attach->dmabuf, map); + return -EIO; + } + } + } else { + ret = ttm_bo_vmap(bo, map); + } + + return ret; +} + +static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + if (obj->import_attach) + dma_buf_vunmap(obj->import_attach->dmabuf, map); + else + drm_gem_ttm_vunmap(obj, map); +} + +static int vmw_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + if (obj->import_attach) { + /* + * Reset both vm_ops and vm_private_data, so we don't end up with + * vm_ops pointing to our implementation if the dma-buf backend + * doesn't set those fields. + */ + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + + ret = dma_buf_mmap(obj->dma_buf, vma, 0); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + if (!ret) + drm_gem_object_put(obj); + + return ret; + } + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct vm_operations_struct vmw_vm_ops = { .pfn_mkwrite = vmw_bo_vm_mkwrite, .page_mkwrite = vmw_bo_vm_mkwrite, @@ -94,9 +148,9 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .pin = vmw_gem_object_pin, .unpin = vmw_gem_object_unpin, .get_sg_table = vmw_gem_object_get_sg_table, - .vmap = drm_gem_ttm_vmap, - .vunmap = drm_gem_ttm_vunmap, - .mmap = drm_gem_ttm_mmap, + .vmap = vmw_gem_vmap, + .vunmap = vmw_gem_vunmap, + .mmap = vmw_gem_mmap, .vm_ops = &vmw_vm_ops, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 00c4ff684130129d6765dc6457233c32bc4b4228..288ed0bb75cb986c94086323202f18dad3bdb3ee 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -193,13 +194,16 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h) */ static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps) { - if (vps->surf) { - if (vps->surf_mapped) - return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - return vps->surf->snooper.image; - } else if (vps->bo) - return vmw_bo_map_and_cache(vps->bo); - return NULL; + struct vmw_surface *surf; + + if (vmw_user_object_is_null(&vps->uo)) + return NULL; + + surf = vmw_user_object_surface(&vps->uo); + if (surf && !vmw_user_object_is_mapped(&vps->uo)) + return surf->snooper.image; + + return vmw_user_object_map(&vps->uo); } static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, @@ -536,22 +540,16 @@ void vmw_du_primary_plane_destroy(struct drm_plane *plane) * vmw_du_plane_unpin_surf - unpins resource associated with a framebuffer surface * * @vps: plane state associated with the display surface - * @unreference: true if we also want to unreference the display. */ -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference) +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps) { - if (vps->surf) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + + if (surf) { if (vps->pinned) { - vmw_resource_unpin(&vps->surf->res); + vmw_resource_unpin(&surf->res); vps->pinned--; } - - if (unreference) { - if (vps->pinned) - DRM_ERROR("Surface still pinned\n"); - vmw_surface_unreference(&vps->surf); - } } } @@ -572,7 +570,7 @@ vmw_du_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - vmw_du_plane_unpin_surf(vps, false); + vmw_du_plane_unpin_surf(vps); } @@ -661,25 +659,14 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane, struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } + if (!vmw_user_object_is_null(&vps->uo)) + vmw_user_object_unmap(&vps->uo); vmw_du_cursor_plane_unmap_cm(vps); vmw_du_put_cursor_mob(vcp, vps); - vmw_du_plane_unpin_surf(vps, false); - - if (vps->surf) { - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; - } + vmw_du_plane_unpin_surf(vps); + vmw_user_object_unref(&vps->uo); } @@ -698,64 +685,48 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); + struct vmw_bo *bo = NULL; int ret = 0; - if (vps->surf) { - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; + if (!vmw_user_object_is_null(&vps->uo)) { + vmw_user_object_unmap(&vps->uo); + vmw_user_object_unref(&vps->uo); } if (fb) { if (vmw_framebuffer_to_vfb(fb)->bo) { - vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer; - vmw_bo_reference(vps->bo); + vps->uo.buffer = vmw_framebuffer_to_vfbd(fb)->buffer; + vps->uo.surface = NULL; } else { - vps->surf = vmw_framebuffer_to_vfbs(fb)->surface; - vmw_surface_reference(vps->surf); + memcpy(&vps->uo, &vmw_framebuffer_to_vfbs(fb)->uo, sizeof(vps->uo)); } + vmw_user_object_ref(&vps->uo); } - if (!vps->surf && vps->bo) { - const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { + struct ttm_operation_ctx ctx = {false, false}; - /* - * Not using vmw_bo_map_and_cache() helper here as we need to - * reserve the ttm_buffer_object first which - * vmw_bo_map_and_cache() omits. - */ - ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL); - - if (unlikely(ret != 0)) + ret = ttm_bo_reserve(&bo->tbo, true, false, NULL); + if (ret != 0) return -ENOMEM; - ret = ttm_bo_kmap(&vps->bo->tbo, 0, PFN_UP(size), &vps->bo->map); - - ttm_bo_unreserve(&vps->bo->tbo); - - if (unlikely(ret != 0)) + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret != 0) return -ENOMEM; - } else if (vps->surf && !vps->bo && vps->surf->res.guest_memory_bo) { - WARN_ON(vps->surf->snooper.image); - ret = ttm_bo_reserve(&vps->surf->res.guest_memory_bo->tbo, true, false, - NULL); - if (unlikely(ret != 0)) - return -ENOMEM; - vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - ttm_bo_unreserve(&vps->surf->res.guest_memory_bo->tbo); - vps->surf_mapped = true; + vmw_bo_pin_reserved(bo, true); + if (vmw_framebuffer_to_vfb(fb)->bo) { + const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + + (void)vmw_bo_map_and_cache_size(bo, size); + } else { + vmw_bo_map_and_cache(bo); + } + ttm_bo_unreserve(&bo->tbo); } - if (vps->surf || vps->bo) { + if (!vmw_user_object_is_null(&vps->uo)) { vmw_du_get_cursor_mob(vcp, vps); vmw_du_cursor_plane_map_cm(vps); } @@ -777,14 +748,17 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); + struct vmw_bo *old_bo = NULL; + struct vmw_bo *new_bo = NULL; s32 hotspot_x, hotspot_y; + int ret; hotspot_x = du->hotspot_x + new_state->hotspot_x; hotspot_y = du->hotspot_y + new_state->hotspot_y; - du->cursor_surface = vps->surf; + du->cursor_surface = vmw_user_object_surface(&vps->uo); - if (!vps->surf && !vps->bo) { + if (vmw_user_object_is_null(&vps->uo)) { vmw_cursor_update_position(dev_priv, false, 0, 0); return; } @@ -792,10 +766,26 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, vps->cursor.hotspot_x = hotspot_x; vps->cursor.hotspot_y = hotspot_y; - if (vps->surf) { + if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + + if (!vmw_user_object_is_null(&old_vps->uo)) { + old_bo = vmw_user_object_buffer(&old_vps->uo); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + if (ret != 0) + return; } + if (!vmw_user_object_is_null(&vps->uo)) { + new_bo = vmw_user_object_buffer(&vps->uo); + if (old_bo != new_bo) { + ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); + if (ret != 0) + return; + } else { + new_bo = NULL; + } + } if (!vmw_du_cursor_plane_has_changed(old_vps, vps)) { /* * If it hasn't changed, avoid making the device do extra @@ -813,6 +803,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + if (new_bo) + ttm_bo_unreserve(&new_bo->tbo); + du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; @@ -913,7 +908,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, } if (!vmw_framebuffer_to_vfb(fb)->bo) { - surface = vmw_framebuffer_to_vfbs(fb)->surface; + surface = vmw_user_object_surface(&vmw_framebuffer_to_vfbs(fb)->uo); WARN_ON(!surface); @@ -1074,12 +1069,7 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) memset(&vps->cursor, 0, sizeof(vps->cursor)); /* Each ref counted resource needs to be acquired again */ - if (vps->surf) - (void) vmw_surface_reference(vps->surf); - - if (vps->bo) - (void) vmw_bo_reference(vps->bo); - + vmw_user_object_ref(&vps->uo); state = &vps->base; __drm_atomic_helper_plane_duplicate_state(plane, state); @@ -1128,11 +1118,7 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(state); /* Should have been freed by cleanup_fb */ - if (vps->surf) - vmw_surface_unreference(&vps->surf); - - if (vps->bo) - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); drm_atomic_helper_plane_destroy_state(plane, state); } @@ -1227,7 +1213,7 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer) vmw_framebuffer_to_vfbs(framebuffer); drm_framebuffer_cleanup(framebuffer); - vmw_surface_unreference(&vfbs->surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); } @@ -1272,29 +1258,41 @@ int vmw_kms_readback(struct vmw_private *dev_priv, return -ENOSYS; } +static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb); + struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo); + + return drm_gem_handle_create(file_priv, &bo->tbo.base, handle); +} static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { + .create_handle = vmw_framebuffer_surface_create_handle, .destroy = vmw_framebuffer_surface_destroy, .dirty = drm_atomic_helper_dirtyfb, }; static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, - struct vmw_surface *surface, + struct vmw_user_object *uo, struct vmw_framebuffer **out, const struct drm_mode_fb_cmd2 - *mode_cmd, - bool is_bo_proxy) + *mode_cmd) { struct drm_device *dev = &dev_priv->drm; struct vmw_framebuffer_surface *vfbs; enum SVGA3dSurfaceFormat format; + struct vmw_surface *surface; int ret; /* 3D is only supported on HWv8 and newer hosts */ if (dev_priv->active_display_unit == vmw_du_legacy) return -ENOSYS; + surface = vmw_user_object_surface(uo); + /* * Sanity checks. */ @@ -1357,8 +1355,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, } drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); - vfbs->surface = vmw_surface_reference(surface); - vfbs->is_bo_proxy = is_bo_proxy; + memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); + vmw_user_object_ref(&vfbs->uo); *out = &vfbs->base; @@ -1370,7 +1368,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, return 0; out_err2: - vmw_surface_unreference(&surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); out_err1: return ret; @@ -1386,7 +1384,6 @@ static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb, { struct vmw_framebuffer_bo *vfbd = vmw_framebuffer_to_vfbd(fb); - return drm_gem_handle_create(file_priv, &vfbd->buffer->tbo.base, handle); } @@ -1407,86 +1404,6 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { .dirty = drm_atomic_helper_dirtyfb, }; -/** - * vmw_create_bo_proxy - create a proxy surface for the buffer object - * - * @dev: DRM device - * @mode_cmd: parameters for the new surface - * @bo_mob: MOB backing the buffer object - * @srf_out: newly created surface - * - * When the content FB is a buffer object, we create a surface as a proxy to the - * same buffer. This way we can do a surface copy rather than a surface DMA. - * This is a more efficient approach - * - * RETURNS: - * 0 on success, error code otherwise - */ -static int vmw_create_bo_proxy(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct vmw_bo *bo_mob, - struct vmw_surface **srf_out) -{ - struct vmw_surface_metadata metadata = {0}; - uint32_t format; - struct vmw_resource *res; - unsigned int bytes_pp; - int ret; - - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - format = SVGA3D_X8R8G8B8; - bytes_pp = 4; - break; - - case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB1555: - format = SVGA3D_R5G6B5; - bytes_pp = 2; - break; - - case 8: - format = SVGA3D_P8; - bytes_pp = 1; - break; - - default: - DRM_ERROR("Invalid framebuffer format %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } - - metadata.format = format; - metadata.mip_levels[0] = 1; - metadata.num_sizes = 1; - metadata.base_size.width = mode_cmd->pitches[0] / bytes_pp; - metadata.base_size.height = mode_cmd->height; - metadata.base_size.depth = 1; - metadata.scanout = true; - - ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out); - if (ret) { - DRM_ERROR("Failed to allocate proxy content buffer\n"); - return ret; - } - - res = &(*srf_out)->res; - - /* Reserve and switch the backing mob. */ - mutex_lock(&res->dev_priv->cmdbuf_mutex); - (void) vmw_resource_reserve(res, false, true); - vmw_user_bo_unref(&res->guest_memory_bo); - res->guest_memory_bo = vmw_user_bo_ref(bo_mob); - res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, 0); - mutex_unlock(&res->dev_priv->cmdbuf_mutex); - - return 0; -} - - - static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, @@ -1565,55 +1482,24 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * vmw_kms_new_framebuffer - Create a new framebuffer. * * @dev_priv: Pointer to device private struct. - * @bo: Pointer to buffer object to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @surface: Pointer to a surface to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @only_2d: No presents will occur to this buffer object based framebuffer. - * This helps the code to do some important optimizations. + * @uo: Pointer to user object to wrap the kms framebuffer around. + * Either the buffer or surface inside the user object must be NULL. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; - bool is_bo_proxy = false; int ret; - /* - * We cannot use the SurfaceDMA command in an non-accelerated VM, - * therefore, wrap the buffer object in a surface so we can use the - * SurfaceCopy command. - */ - if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height) && - bo && only_2d && - mode_cmd->width > 64 && /* Don't create a proxy for cursor */ - dev_priv->active_display_unit == vmw_du_screen_target) { - ret = vmw_create_bo_proxy(&dev_priv->drm, mode_cmd, - bo, &surface); - if (ret) - return ERR_PTR(ret); - - is_bo_proxy = true; - } - /* Create the new framebuffer depending one what we have */ - if (surface) { - ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, - mode_cmd, - is_bo_proxy); - /* - * vmw_create_bo_proxy() adds a reference that is no longer - * needed - */ - if (is_bo_proxy) - vmw_surface_unreference(&surface); - } else if (bo) { - ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb, + if (vmw_user_object_surface(uo)) { + ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, + mode_cmd); + } else if (uo->buffer) { + ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, mode_cmd); } else { BUG(); @@ -1635,14 +1521,12 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, { struct vmw_private *dev_priv = vmw_priv(dev); struct vmw_framebuffer *vfb = NULL; - struct vmw_surface *surface = NULL; - struct vmw_bo *bo = NULL; + struct vmw_user_object uo = {0}; int ret; /* returns either a bo or surface */ - ret = vmw_user_lookup_handle(dev_priv, file_priv, - mode_cmd->handles[0], - &surface, &bo); + ret = vmw_user_object_lookup(dev_priv, file_priv, mode_cmd->handles[0], + &uo); if (ret) { DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", mode_cmd->handles[0], mode_cmd->handles[0]); @@ -1650,7 +1534,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - if (!bo && + if (vmw_user_object_surface(&uo) && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, @@ -1659,20 +1543,15 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, bo, surface, - !(dev_priv->capabilities & SVGA_CAP_3D), - mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; } err_out: - /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) - vmw_user_bo_unref(&bo); - if (surface) - vmw_surface_unreference(&surface); + /* vmw_user_object_lookup takes one ref so does new_fb */ + vmw_user_object_unref(&uo); if (ret) { DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); @@ -2584,72 +2463,6 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, vmw_fence_obj_unreference(&fence); } -/** - * vmw_kms_update_proxy - Helper function to update a proxy surface from - * its backing MOB. - * - * @res: Pointer to the surface resource - * @clips: Clip rects in framebuffer (surface) space. - * @num_clips: Number of clips in @clips. - * @increment: Integer with which to increment the clip counter when looping. - * Used to skip a predetermined number of clip rects. - * - * This function makes sure the proxy surface is updated from its backing MOB - * using the region given by @clips. The surface resource @res and its backing - * MOB needs to be reserved and validated on call. - */ -int vmw_kms_update_proxy(struct vmw_resource *res, - const struct drm_clip_rect *clips, - unsigned num_clips, - int increment) -{ - struct vmw_private *dev_priv = res->dev_priv; - struct drm_vmw_size *size = &vmw_res_to_srf(res)->metadata.base_size; - struct { - SVGA3dCmdHeader header; - SVGA3dCmdUpdateGBImage body; - } *cmd; - SVGA3dBox *box; - size_t copy_size = 0; - int i; - - if (!clips) - return 0; - - cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd) * num_clips); - if (!cmd) - return -ENOMEM; - - for (i = 0; i < num_clips; ++i, clips += increment, ++cmd) { - box = &cmd->body.box; - - cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd->header.size = sizeof(cmd->body); - cmd->body.image.sid = res->id; - cmd->body.image.face = 0; - cmd->body.image.mipmap = 0; - - if (clips->x1 > size->width || clips->x2 > size->width || - clips->y1 > size->height || clips->y2 > size->height) { - DRM_ERROR("Invalid clips outsize of framebuffer.\n"); - return -EINVAL; - } - - box->x = clips->x1; - box->y = clips->y1; - box->z = 0; - box->w = clips->x2 - clips->x1; - box->h = clips->y2 - clips->y1; - box->d = 1; - - copy_size += sizeof(*cmd); - } - - vmw_cmd_commit(dev_priv, copy_size); - - return 0; -} - /** * vmw_kms_create_implicit_placement_property - Set up the implicit placement * property. @@ -2784,8 +2597,9 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) } else { struct vmw_framebuffer_surface *vfbs = container_of(update->vfb, typeof(*vfbs), base); + struct vmw_surface *surf = vmw_user_object_surface(&vfbs->uo); - ret = vmw_validation_add_resource(&val_ctx, &vfbs->surface->res, + ret = vmw_validation_add_resource(&val_ctx, &surf->res, 0, VMW_RES_DIRTY_NONE, NULL, NULL); } @@ -2941,3 +2755,93 @@ int vmw_connector_get_modes(struct drm_connector *connector) return num_modes; } + +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_ref(uo->buffer); + else if (uo->surface) + vmw_surface_reference(uo->surface); + return uo; +} + +void vmw_user_object_unref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_unref(&uo->buffer); + else if (uo->surface) + vmw_surface_unreference(&uo->surface); +} + +struct vmw_bo * +vmw_user_object_buffer(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer; + else if (uo->surface) + return uo->surface->res.guest_memory_bo; + return NULL; +} + +struct vmw_surface * +vmw_user_object_surface(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer->dumb_surface; + return uo->surface; +} + +void *vmw_user_object_map(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache(bo); +} + +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache_size(bo, size); +} + +void vmw_user_object_unmap(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + int ret; + + WARN_ON(!bo); + + /* Fence the mob creation so we are guarateed to have the mob */ + ret = ttm_bo_reserve(&bo->tbo, false, false, NULL); + if (ret != 0) + return; + + vmw_bo_unmap(bo); + vmw_bo_pin_reserved(bo, false); + + ttm_bo_unreserve(&bo->tbo); +} + +bool vmw_user_object_is_mapped(struct vmw_user_object *uo) +{ + struct vmw_bo *bo; + + if (!uo || vmw_user_object_is_null(uo)) + return false; + + bo = vmw_user_object_buffer(uo); + + if (WARN_ON(!bo)) + return false; + + WARN_ON(bo->map.bo && !bo->map.virtual); + return bo->map.virtual; +} + +bool vmw_user_object_is_null(struct vmw_user_object *uo) +{ + return !uo->buffer && !uo->surface; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bf24f2f0dcfc931fc9788e30c8245f12ed1472e3..6141fadf81efeb2cda7d91274dce16def626e46d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -221,11 +222,9 @@ struct vmw_framebuffer { struct vmw_framebuffer_surface { struct vmw_framebuffer base; - struct vmw_surface *surface; - bool is_bo_proxy; /* true if this is proxy surface for DMA buf */ + struct vmw_user_object uo; }; - struct vmw_framebuffer_bo { struct vmw_framebuffer base; struct vmw_bo *buffer; @@ -277,8 +276,7 @@ struct vmw_cursor_plane_state { */ struct vmw_plane_state { struct drm_plane_state base; - struct vmw_surface *surf; - struct vmw_bo *bo; + struct vmw_user_object uo; int content_fb_type; unsigned long bo_size; @@ -457,9 +455,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, uint32_t num_clips); struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); @@ -486,8 +482,7 @@ void vmw_du_plane_reset(struct drm_plane *plane); struct drm_plane_state *vmw_du_plane_duplicate_state(struct drm_plane *plane); void vmw_du_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference); +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps); int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 5befc2719a4984a0f8d5cb31d6dd855a4f38521e..39949e0a493f3daa88b1f784c3d0f44f894b0557 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -147,8 +148,9 @@ static int vmw_ldu_fb_pin(struct vmw_framebuffer *vfb) struct vmw_bo *buf; int ret; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); if (!buf) return 0; @@ -169,8 +171,10 @@ static int vmw_ldu_fb_unpin(struct vmw_framebuffer *vfb) struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_bo *buf; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); + if (WARN_ON(!buf)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index c45b4724e4141dbfc07df9554c09917b8c9ec9ec..e20f64b67b2669ba8b818a431a57a6a927496732 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -92,7 +92,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv, { struct vmw_escape_video_flush *flush; size_t fifo_size; - bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object); + bool have_so = (dev_priv->active_display_unit != vmw_du_legacy); int i, num_items; SVGAGuestPtr ptr; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index c99cad444991579f6e665453b74f56cb35de2e15..598b90ac7590b52b4b0e2db6d7e21ca17564eef4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2013-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,6 +32,7 @@ */ #include "vmwgfx_drv.h" +#include "vmwgfx_bo.h" #include "ttm_object.h" #include @@ -88,13 +90,35 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, uint32_t handle, uint32_t flags, int *prime_fd) { + struct vmw_private *vmw = vmw_priv(dev); struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo; int ret; + int surf_handle; - if (handle > VMWGFX_NUM_MOB) + if (handle > VMWGFX_NUM_MOB) { ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); - else - ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + } else { + ret = vmw_user_bo_lookup(file_priv, handle, &vbo); + if (ret) + return ret; + if (vbo && vbo->is_dumb) { + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, + flags, prime_fd); + } else { + surf_handle = vmw_lookup_surface_handle_for_buffer(vmw, + vbo, + handle); + if (surf_handle > 0) + ret = ttm_prime_handle_to_fd(tfile, surf_handle, + flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, + handle, flags, + prime_fd); + } + vmw_user_bo_unref(&vbo); + } return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 848dba09981b0d102edcc1288974f994e72b505b..a73af8a355fbf5086df5e41f330bd106ab8be61c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -58,6 +59,7 @@ void vmw_resource_mob_attach(struct vmw_resource *res) rb_link_node(&res->mob_node, parent, new); rb_insert_color(&res->mob_node, &gbo->res_tree); + vmw_bo_del_detached_resource(gbo, res); vmw_bo_prio_add(gbo, res->used_prio); } @@ -287,28 +289,35 @@ out_bad_resource: * * The pointer this pointed at by out_surf and out_buf needs to be null. */ -int vmw_user_lookup_handle(struct vmw_private *dev_priv, +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf) + u32 handle, + struct vmw_user_object *uo) { struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile; struct vmw_resource *res; int ret; - BUG_ON(*out_surf || *out_buf); + WARN_ON(uo->surface || uo->buffer); ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle, user_surface_converter, &res); if (!ret) { - *out_surf = vmw_res_to_srf(res); + uo->surface = vmw_res_to_srf(res); return 0; } - *out_surf = NULL; - ret = vmw_user_bo_lookup(filp, handle, out_buf); + uo->surface = NULL; + ret = vmw_user_bo_lookup(filp, handle, &uo->buffer); + if (!ret && !uo->buffer->is_dumb) { + uo->surface = vmw_lookup_surface_for_buffer(dev_priv, + uo->buffer, + handle); + if (uo->surface) + vmw_user_bo_unref(&uo->buffer); + } + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index df0039a8ef29a21d7cb6b6063f62a4876a25a446..0f4bfd98480afe0ce116ecba4fa9a37c139ad6cf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2011-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -240,7 +241,7 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc) struct vmw_connector_state *vmw_conn_state; int x, y; - sou->buffer = vps->bo; + sou->buffer = vmw_user_object_buffer(&vps->uo); conn_state = sou->base.connector.state; vmw_conn_state = vmw_connector_state_to_vcs(conn_state); @@ -376,10 +377,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); struct drm_crtc *crtc = plane->state->crtc ? plane->state->crtc : old_state->crtc; + struct vmw_bo *bo = vmw_user_object_buffer(&vps->uo); - if (vps->bo) - vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false); - vmw_bo_unreference(&vps->bo); + if (bo) + vmw_bo_unpin(vmw_priv(crtc->dev), bo, false); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; vmw_du_plane_cleanup_fb(plane, old_state); @@ -411,9 +413,10 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, .bo_type = ttm_bo_type_device, .pin = true }; + struct vmw_bo *bo = NULL; if (!new_fb) { - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; return 0; @@ -422,17 +425,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, bo_params.size = new_state->crtc_w * new_state->crtc_h * 4; dev_priv = vmw_priv(crtc->dev); - if (vps->bo) { + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { if (vps->bo_size == bo_params.size) { /* * Note that this might temporarily up the pin-count * to 2, until cleanup_fb() is called. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, - true); + return vmw_bo_pin_in_vram(dev_priv, bo, true); } - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; } @@ -442,7 +445,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * resume the overlays, this is preferred to failing to alloc. */ vmw_overlay_pause_all(dev_priv); - ret = vmw_bo_create(dev_priv, &bo_params, &vps->bo); + ret = vmw_gem_object_create(dev_priv, &bo_params, &vps->uo.buffer); vmw_overlay_resume_all(dev_priv); if (ret) return ret; @@ -453,7 +456,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, true); + return vmw_bo_pin_in_vram(dev_priv, vps->uo.buffer, true); } static uint32_t vmw_sou_bo_fifo_size(struct vmw_du_update_plane *update, @@ -580,6 +583,7 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, { struct vmw_kms_sou_dirty_cmd *blit = cmd; struct vmw_framebuffer_surface *vfbs; + struct vmw_surface *surf = NULL; vfbs = container_of(update->vfb, typeof(*vfbs), base); @@ -587,7 +591,8 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, blit->header.size = sizeof(blit->body) + sizeof(SVGASignedRect) * num_hits; - blit->body.srcImage.sid = vfbs->surface->res.id; + surf = vmw_user_object_surface(&vfbs->uo); + blit->body.srcImage.sid = surf->res.id; blit->body.destScreenId = update->du->unit; /* Update the source and destination bounding box later in post_clip */ @@ -1104,7 +1109,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index a04e0736318da6c09876ec4fdfe02ae05c3836a7..5453f7cf0e2d7b05e1020743dc18dcd766617308 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /****************************************************************************** * - * COPYRIGHT (C) 2014-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2014-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -29,6 +30,7 @@ #include "vmwgfx_kms.h" #include "vmwgfx_vkms.h" #include "vmw_surface_cache.h" +#include #include #include @@ -735,7 +737,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); @@ -746,12 +748,6 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, if (ret) goto out_unref; - if (vfbs->is_bo_proxy) { - ret = vmw_kms_update_proxy(srf, clips, num_clips, inc); - if (ret) - goto out_finish; - } - sdirty.base.fifo_commit = vmw_kms_stdu_surface_fifo_commit; sdirty.base.clip = vmw_kms_stdu_surface_clip; sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) + @@ -765,7 +761,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); -out_finish: + vmw_kms_helper_validation_finish(dev_priv, NULL, &val_ctx, out_fence, NULL); @@ -877,6 +873,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +/* + * Trigger a modeset if the X,Y position of the Screen Target changes. + * This is needed when multi-mon is cycled. The original Screen Target will have + * the same mode but its relative X,Y position in the topology will change. + */ +static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, + struct drm_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct vmw_screen_target_display_unit *du; + struct drm_crtc_state *new_crtc_state; + + conn_state = drm_atomic_get_connector_state(state, conn); + du = vmw_connector_to_stdu(conn); + + if (!conn_state->crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (du->base.gui_x != du->base.set_gui_x || + du->base.gui_y != du->base.set_gui_y) + new_crtc_state->mode_changed = true; + + return 0; +} + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, @@ -891,7 +913,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { .get_modes = vmw_connector_get_modes, - .mode_valid = vmw_stdu_connector_mode_valid + .mode_valid = vmw_stdu_connector_mode_valid, + .atomic_check = vmw_stdu_connector_atomic_check, }; @@ -918,9 +941,8 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf) + if (vmw_user_object_surface(&vps->uo)) WARN_ON(!vps->pinned); - vmw_du_plane_cleanup_fb(plane, old_state); vps->content_fb_type = SAME_AS_DISPLAY; @@ -928,7 +950,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, } - /** * vmw_stdu_primary_plane_prepare_fb - Readies the display surface * @@ -952,13 +973,15 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, enum stdu_content_type new_content_type; struct vmw_framebuffer_surface *new_vfbs; uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h; + struct drm_plane_state *old_state = plane->state; + struct drm_rect rect; int ret; /* No FB to prepare */ if (!new_fb) { - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } return 0; @@ -968,8 +991,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb); if (new_vfbs && - new_vfbs->surface->metadata.base_size.width == hdisplay && - new_vfbs->surface->metadata.base_size.height == vdisplay) + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.width == hdisplay && + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.height == vdisplay) new_content_type = SAME_AS_DISPLAY; else if (vfb->bo) new_content_type = SEPARATE_BO; @@ -1007,29 +1030,29 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, metadata.num_sizes = 1; metadata.scanout = true; } else { - metadata = new_vfbs->surface->metadata; + metadata = vmw_user_object_surface(&new_vfbs->uo)->metadata; } metadata.base_size.width = hdisplay; metadata.base_size.height = vdisplay; metadata.base_size.depth = 1; - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { struct drm_vmw_size cur_base_size = - vps->surf->metadata.base_size; + vmw_user_object_surface(&vps->uo)->metadata.base_size; if (cur_base_size.width != metadata.base_size.width || cur_base_size.height != metadata.base_size.height || - vps->surf->metadata.format != metadata.format) { + vmw_user_object_surface(&vps->uo)->metadata.format != metadata.format) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } } - if (!vps->surf) { + if (!vmw_user_object_surface(&vps->uo)) { ret = vmw_gb_surface_define(dev_priv, &metadata, - &vps->surf); + &vps->uo.surface); if (ret != 0) { DRM_ERROR("Couldn't allocate STDU surface.\n"); return ret; @@ -1042,18 +1065,19 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * The only time we add a reference in prepare_fb is if the * state object doesn't have a reference to begin with */ - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } - vps->surf = vmw_surface_reference(new_vfbs->surface); + memcpy(&vps->uo, &new_vfbs->uo, sizeof(vps->uo)); + vmw_user_object_ref(&vps->uo); } - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { /* Pin new surface before flipping */ - ret = vmw_resource_pin(&vps->surf->res, false); + ret = vmw_resource_pin(&vmw_user_object_surface(&vps->uo)->res, false); if (ret) goto out_srf_unref; @@ -1062,6 +1086,34 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, vps->content_fb_type = new_content_type; + /* + * The drm fb code will do blit's via the vmap interface, which doesn't + * trigger vmw_bo page dirty tracking due to being kernel side (and thus + * doesn't require mmap'ing) so we have to update the surface's dirty + * regions by hand but we want to be careful to not overwrite the + * resource if it has been written to by the gpu (res_dirty). + */ + if (vps->uo.buffer && vps->uo.buffer->is_dumb) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + struct vmw_resource *res = &surf->res; + + if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state, + new_state, + &rect)) { + /* + * At some point it might be useful to actually translate + * (rect.x1, rect.y1) => start, and (rect.x2, rect.y2) => end, + * but currently the fb code will just report the entire fb + * dirty so in practice it doesn't matter. + */ + pgoff_t start = res->guest_memory_offset >> PAGE_SHIFT; + pgoff_t end = __KERNEL_DIV_ROUND_UP(res->guest_memory_offset + + res->guest_memory_size, + PAGE_SIZE); + vmw_resource_dirty_update(res, start, end); + } + } + /* * This should only happen if the buffer object is too large to create a * proxy surface for. @@ -1072,7 +1124,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, return 0; out_srf_unref: - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); return ret; } @@ -1214,14 +1266,8 @@ static uint32_t vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_update); return size; @@ -1230,61 +1276,14 @@ vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) * num_hits + sizeof(struct vmw_stdu_update); return size; } -static uint32_t -vmw_stdu_surface_update_proxy(struct vmw_du_update_plane *update, void *cmd) -{ - struct vmw_framebuffer_surface *vfbs; - struct drm_plane_state *state = update->plane->state; - struct drm_plane_state *old_state = update->old_state; - struct vmw_stdu_update_gb_image *cmd_update = cmd; - struct drm_atomic_helper_damage_iter iter; - struct drm_rect clip; - uint32_t copy_size = 0; - - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - /* - * proxy surface is special where a buffer object type fb is wrapped - * in a surface and need an update gb image command to sync with device. - */ - drm_atomic_helper_damage_iter_init(&iter, old_state, state); - drm_atomic_for_each_plane_damage(&iter, &clip) { - SVGA3dBox *box = &cmd_update->body.box; - - cmd_update->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd_update->header.size = sizeof(cmd_update->body); - cmd_update->body.image.sid = vfbs->surface->res.id; - cmd_update->body.image.face = 0; - cmd_update->body.image.mipmap = 0; - - box->x = clip.x1; - box->y = clip.y1; - box->z = 0; - box->w = drm_rect_width(&clip); - box->h = drm_rect_height(&clip); - box->d = 1; - - copy_size += sizeof(*cmd_update); - cmd_update++; - } - - return copy_size; -} - static uint32_t vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, uint32_t num_hits) @@ -1299,7 +1298,7 @@ vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, cmd_copy->header.id = SVGA_3D_CMD_SURFACE_COPY; cmd_copy->header.size = sizeof(cmd_copy->body) + sizeof(SVGA3dCopyBox) * num_hits; - cmd_copy->body.src.sid = vfbs->surface->res.id; + cmd_copy->body.src.sid = vmw_user_object_surface(&vfbs->uo)->res.id; cmd_copy->body.dest.sid = stdu->display_srf->res.id; return sizeof(*cmd_copy); @@ -1370,10 +1369,7 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv, srf_update.mutex = &dev_priv->cmdbuf_mutex; srf_update.intr = true; - if (vfbs->is_bo_proxy) - srf_update.post_prepare = vmw_stdu_surface_update_proxy; - - if (vfbs->surface->res.id != stdu->display_srf->res.id) { + if (vmw_user_object_surface(&vfbs->uo)->res.id != stdu->display_srf->res.id) { srf_update.calc_fifo_size = vmw_stdu_surface_fifo_size; srf_update.pre_clip = vmw_stdu_surface_populate_copy; srf_update.clip = vmw_stdu_surface_populate_clip; @@ -1417,7 +1413,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu = vmw_crtc_to_stdu(crtc); dev_priv = vmw_priv(crtc->dev); - stdu->display_srf = vps->surf; + stdu->display_srf = vmw_user_object_surface(&vps->uo); stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index e7a744dfcecfd4837a0b50aaa449f329990356c8..8ae6a761c900345d1711e99c298784c1e8aefc71 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -36,9 +37,6 @@ #include #define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32) -#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32) -#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ - (svga3d_flags & ((uint64_t)U32_MAX)) /** * struct vmw_user_surface - User-space visible surface resource @@ -686,6 +684,14 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + + /* + * Dumb buffers own the resource and they'll unref the + * resource themselves + */ + if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) + return; + vmw_resource_unreference(&res); } @@ -812,7 +818,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } } res->guest_memory_size = cur_bo_offset; - if (metadata->scanout && + if (!file_priv->atomic && + metadata->scanout && metadata->num_sizes == 1 && metadata->sizes[0].width == VMW_CURSOR_SNOOP_WIDTH && metadata->sizes[0].height == VMW_CURSOR_SNOOP_HEIGHT && @@ -864,6 +871,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } + vmw_bo_add_detached_resource(res->guest_memory_bo, res); } tmp = vmw_resource_reference(&srf->res); @@ -892,6 +900,113 @@ out_unlock: return ret; } +static struct vmw_user_surface * +vmw_lookup_user_surface_for_buffer(struct vmw_private *vmw, struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = NULL; + struct vmw_surface *surf; + struct ttm_base_object *base; + + surf = vmw_bo_surface(bo); + if (surf) { + rcu_read_lock(); + user_srf = container_of(surf, struct vmw_user_surface, srf); + base = &user_srf->prime.base; + if (base && !kref_get_unless_zero(&base->refcount)) { + drm_dbg_driver(&vmw->drm, + "%s: referencing a stale surface handle %d\n", + __func__, handle); + base = NULL; + user_srf = NULL; + } + rcu_read_unlock(); + } + + return user_srf; +} + +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + struct vmw_surface *surf = NULL; + struct ttm_base_object *base; + + if (user_srf) { + surf = vmw_surface_reference(&user_srf->srf); + base = &user_srf->prime.base; + ttm_base_object_unref(&base); + } + return surf; +} + +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + int surf_handle = 0; + struct ttm_base_object *base; + + if (user_srf) { + base = &user_srf->prime.base; + surf_handle = (u32)base->handle; + ttm_base_object_unref(&base); + } + return surf_handle; +} + +static int vmw_buffer_prime_to_surface_base(struct vmw_private *dev_priv, + struct drm_file *file_priv, + u32 fd, u32 *handle, + struct ttm_base_object **base_p) +{ + struct ttm_base_object *base; + struct vmw_bo *bo; + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_user_surface *user_srf; + int ret; + + ret = drm_gem_prime_fd_to_handle(&dev_priv->drm, file_priv, fd, handle); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to find user buffer for fd = %u.\n", fd); + return ret; + } + + ret = vmw_user_bo_lookup(file_priv, *handle, &bo); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to lookup user buffer for handle = %u.\n", *handle); + return ret; + } + + user_srf = vmw_lookup_user_surface_for_buffer(dev_priv, bo, *handle); + if (WARN_ON(!user_srf)) { + drm_warn(&dev_priv->drm, + "User surface fd %d (handle %d) is null.\n", fd, *handle); + ret = -EINVAL; + goto out; + } + + base = &user_srf->prime.base; + ret = ttm_ref_object_add(tfile, base, NULL, false); + if (ret) { + drm_warn(&dev_priv->drm, + "Couldn't add an object ref for the buffer (%d).\n", *handle); + goto out; + } + + *base_p = base; +out: + vmw_user_bo_unref(&bo); + + return ret; +} static int vmw_surface_handle_reference(struct vmw_private *dev_priv, @@ -901,15 +1016,19 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, struct ttm_base_object **base_p) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct vmw_user_surface *user_srf; + struct vmw_user_surface *user_srf = NULL; uint32_t handle; struct ttm_base_object *base; int ret; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); - if (unlikely(ret != 0)) - return ret; + if (ret) + return vmw_buffer_prime_to_surface_base(dev_priv, + file_priv, + u_handle, + &handle, + base_p); } else { handle = u_handle; } @@ -1503,7 +1622,12 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle, &res->guest_memory_bo); if (ret == 0) { - if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { + if (res->guest_memory_bo->is_dumb) { + VMW_DEBUG_USER("Can't backup surface with a dumb buffer.\n"); + vmw_user_bo_unref(&res->guest_memory_bo); + ret = -EINVAL; + goto out_unlock; + } else if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; @@ -1560,6 +1684,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { + vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2100,3 +2225,140 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, out_unlock: return ret; } + +static SVGA3dSurfaceFormat vmw_format_bpp_to_svga(struct vmw_private *vmw, + int bpp) +{ + switch (bpp) { + case 8: /* DRM_FORMAT_C8 */ + return SVGA3D_P8; + case 16: /* DRM_FORMAT_RGB565 */ + return SVGA3D_R5G6B5; + case 32: /* DRM_FORMAT_XRGB8888 */ + if (has_sm4_context(vmw)) + return SVGA3D_B8G8R8X8_UNORM; + return SVGA3D_X8R8G8B8; + default: + drm_warn(&vmw->drm, "Unsupported format bpp: %d\n", bpp); + return SVGA3D_X8R8G8B8; + } +} + +/** + * vmw_dumb_create - Create a dumb kms buffer + * + * @file_priv: Pointer to a struct drm_file identifying the caller. + * @dev: Pointer to the drm device. + * @args: Pointer to a struct drm_mode_create_dumb structure + * Return: Zero on success, negative error code on failure. + * + * This is a driver callback for the core drm create_dumb functionality. + * Note that this is very similar to the vmw_bo_alloc ioctl, except + * that the arguments have a different format. + */ +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo = NULL; + struct vmw_resource *res = NULL; + union drm_vmw_gb_surface_create_ext_arg arg = { 0 }; + struct drm_vmw_gb_surface_create_ext_req *req = &arg.req; + int ret; + struct drm_vmw_size drm_size = { + .width = args->width, + .height = args->height, + .depth = 1, + }; + SVGA3dSurfaceFormat format = vmw_format_bpp_to_svga(dev_priv, args->bpp); + const struct SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + SVGA3dSurfaceAllFlags flags = SVGA3D_SURFACE_HINT_TEXTURE | + SVGA3D_SURFACE_HINT_RENDERTARGET | + SVGA3D_SURFACE_SCREENTARGET | + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | + SVGA3D_SURFACE_BIND_RENDER_TARGET; + + /* + * Without mob support we're just going to use raw memory buffer + * because we wouldn't be able to support full surface coherency + * without mobs + */ + if (!dev_priv->has_mob) { + int cpp = DIV_ROUND_UP(args->bpp, 8); + + switch (cpp) { + case 1: /* DRM_FORMAT_C8 */ + case 2: /* DRM_FORMAT_RGB565 */ + case 4: /* DRM_FORMAT_XRGB8888 */ + break; + default: + /* + * Dumb buffers don't allow anything else. + * This is tested via IGT's dumb_buffers + */ + return -EINVAL; + } + + args->pitch = args->width * cpp; + args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); + + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + args->size, &args->handle, + &vbo); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->tbo.base); + return ret; + } + + req->version = drm_vmw_gb_surface_v1; + req->multisample_pattern = SVGA3D_MS_PATTERN_NONE; + req->quality_level = SVGA3D_MS_QUALITY_NONE; + req->buffer_byte_stride = 0; + req->must_be_zero = 0; + req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags); + req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags); + req->base.format = (uint32_t)format; + req->base.drm_surface_flags = drm_vmw_surface_flag_scanout; + req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable; + req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer; + req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent; + req->base.base_size.width = args->width; + req->base.base_size.height = args->height; + req->base.base_size.depth = 1; + req->base.array_size = 0; + req->base.mip_levels = 1; + req->base.multisample_count = 0; + req->base.buffer_handle = SVGA3D_INVALID_ID; + req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE; + ret = vmw_gb_surface_define_ext_ioctl(dev, &arg, file_priv); + if (ret) { + drm_warn(dev, "Unable to create a dumb buffer\n"); + return ret; + } + + args->handle = arg.rep.buffer_handle; + args->size = arg.rep.buffer_size; + args->pitch = vmw_surface_calculate_pitch(desc, &drm_size); + + ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg.rep.handle, + user_surface_converter, + &res); + if (ret) { + drm_err(dev, "Created resource handle doesn't exist!\n"); + goto err; + } + + vbo = res->guest_memory_bo; + vbo->is_dumb = true; + vbo->dumb_surface = vmw_res_to_srf(res); + +err: + if (res) + vmw_resource_unreference(&res); + if (ret) + ttm_ref_object_base_unref(tfile, arg.rep.handle); + + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c index 3bfcf671fcd58303e1cbbfbfa86375afae4256ef..8651b788e98bc41f9dbba4d68f56e734defdf76e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c @@ -75,7 +75,7 @@ done: return ret; } -static int +static void compute_crc(struct drm_crtc *crtc, struct vmw_surface *surf, u32 *crc) @@ -101,8 +101,6 @@ compute_crc(struct drm_crtc *crtc, } vmw_bo_unmap(bo); - - return 0; } static void @@ -116,7 +114,6 @@ crc_generate_worker(struct work_struct *work) u64 frame_start, frame_end; u32 crc32 = 0; struct vmw_surface *surf = 0; - int ret; spin_lock_irq(&du->vkms.crc_state_lock); crc_pending = du->vkms.crc_pending; @@ -130,22 +127,24 @@ crc_generate_worker(struct work_struct *work) return; spin_lock_irq(&du->vkms.crc_state_lock); - surf = du->vkms.surface; + surf = vmw_surface_reference(du->vkms.surface); spin_unlock_irq(&du->vkms.crc_state_lock); - if (vmw_surface_sync(vmw, surf)) { - drm_warn(crtc->dev, "CRC worker wasn't able to sync the crc surface!\n"); - return; - } + if (surf) { + if (vmw_surface_sync(vmw, surf)) { + drm_warn( + crtc->dev, + "CRC worker wasn't able to sync the crc surface!\n"); + return; + } - ret = compute_crc(crtc, surf, &crc32); - if (ret) - return; + compute_crc(crtc, surf, &crc32); + vmw_surface_unreference(&surf); + } spin_lock_irq(&du->vkms.crc_state_lock); frame_start = du->vkms.frame_start; frame_end = du->vkms.frame_end; - crc_pending = du->vkms.crc_pending; du->vkms.frame_start = 0; du->vkms.frame_end = 0; du->vkms.crc_pending = false; @@ -164,7 +163,7 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) struct vmw_display_unit *du = container_of(timer, struct vmw_display_unit, vkms.timer); struct drm_crtc *crtc = &du->crtc; struct vmw_private *vmw = vmw_priv(crtc->dev); - struct vmw_surface *surf = NULL; + bool has_surface = false; u64 ret_overrun; bool locked, ret; @@ -179,10 +178,10 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) WARN_ON(!ret); if (!locked) return HRTIMER_RESTART; - surf = du->vkms.surface; + has_surface = du->vkms.surface != NULL; vmw_vkms_unlock(crtc); - if (du->vkms.crc_enabled && surf) { + if (du->vkms.crc_enabled && has_surface) { u64 frame = drm_crtc_accurate_vblank_count(crtc); spin_lock(&du->vkms.crc_state_lock); @@ -336,6 +335,8 @@ vmw_vkms_crtc_cleanup(struct drm_crtc *crtc) { struct vmw_display_unit *du = vmw_crtc_to_du(crtc); + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); WARN_ON(work_pending(&du->vkms.crc_generator_work)); hrtimer_cancel(&du->vkms.timer); } @@ -497,9 +498,12 @@ vmw_vkms_set_crc_surface(struct drm_crtc *crtc, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_private *vmw = vmw_priv(crtc->dev); - if (vmw->vkms_enabled) { + if (vmw->vkms_enabled && du->vkms.surface != surf) { WARN_ON(atomic_read(&du->vkms.atomic_lock) != VMW_VKMS_LOCK_MODESET); - du->vkms.surface = surf; + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); + if (surf) + du->vkms.surface = vmw_surface_reference(surf); } } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 03492fbcb8fbf1aef6ad874b7899100530e60ed6..f2f1d8ddb22138cb0e04df0635b01478b6a14bb2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -87,9 +87,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } +static void xe_file_destroy(struct kref *ref) +{ + struct xe_file *xef = container_of(ref, struct xe_file, refcount); + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -98,6 +144,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -108,21 +156,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); + xe_file_put(xef); - xe_drm_client_put(xef->client); - kfree(xef); + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { @@ -854,6 +895,13 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } +static void xe_device_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + xe_pm_runtime_put(xe); +} + /** * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance @@ -870,11 +918,21 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) */ void xe_device_declare_wedged(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + if (xe->wedged.mode == 0) { drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); return; } + if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { + drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); + return; + } + + xe_pm_runtime_get_noresume(xe); + if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; drm_err(&xe->drm, @@ -883,4 +941,7 @@ void xe_device_declare_wedged(struct xe_device *xe) "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); } + + for_each_gt(gt, xe, id) + xe_gt_declare_wedged(gt); } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb3514925adc29f1bc18fbd95ba345..b3952718b3c1cb165521960d84fda91d3ce1be9e 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744ac604bb4e50a07fb7ee31c40dbbe..cbc582bcc90a5fb1ee77e3cbd3e02014581dd7b4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -566,6 +566,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e0f0e6864d14d3e86992e0a40f75a..7ddd59908334c54ac68ba32048db966871fc5cdb 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 2d72cdec3a0b394f3b65dabf1c818fe6dd2857cc..f36980aa26e69a8bf439f4b69f3b5b98b78ff40f 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -118,7 +118,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0, num_ufence = 0; + u32 i, num_syncs, num_ufence = 0; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; @@ -156,15 +156,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) vm = q->vm; - for (i = 0; i < args->num_syncs; i++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], - &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; - if (xe_sync_is_ufence(&syncs[i])) + if (xe_sync_is_ufence(&syncs[num_syncs])) num_ufence++; } @@ -325,8 +325,8 @@ err_unlock_list: if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: - for (i = 0; i < num_syncs; i++) - xe_sync_entry_cleanup(&syncs[i]); + while (num_syncs--) + xe_sync_entry_cleanup(&syncs[num_syncs]); kfree(syncs); err_exec_queue: xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b016975b0796937154040d76fde34..a39384bb9553ffbc2e4e9fb2c31b1b7c7df89c14 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c35955ba6223a960d94794653b6a8d..a35ce24c97982c880aec69b6dffe3151b3e8ef93 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -139,10 +142,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0ba2e2d0289b2391596045b6e2da24e7d7f9cca5..31b2e64c70c6abf9bc489eaf41d9f30b997ab766 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -904,3 +904,18 @@ struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) return NULL; } + +/** + * xe_gt_declare_wedged() - Declare GT wedged + * @gt: the GT object + * + * Wedge the GT which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_gt_declare_wedged(struct xe_gt *gt) +{ + xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); + + xe_uc_declare_wedged(>->uc); + xe_gt_tlb_invalidation_reset(gt); +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 1123fdfc4ebc17157fc00ee05d9cd57b49d57b3b..8b1a5027dcf271953e96e97e6fe6657535b6b8b7 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -37,6 +37,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); +void xe_gt_declare_wedged(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index db6c213da847d505c2cf6e1ef8d8da30475a96b7..b6f0a7299c030915cc9f556fa3c6a22705c369f7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1543,6 +1543,7 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); + fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); @@ -1926,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1934,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e89e28670f9d0518e10e5637fc29d..8892d6c2291ebb95ef95faabcbeafd9a36fbd0d3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8bb6cf6ef25035663036a2fcb37c19..481d83d07367d23df42d7fd97fbae65617bee94e 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,10 +13,13 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); @@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); + return ret; + } - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -400,43 +403,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_vma_vm(vma)->usm.asid); } -/** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - /** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc @@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985ba6b04fe73d67611e295de665c65..a84065fa324c766ad5c81889333bb738c409684b 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31cb728d7e751555e9525233f30ae6..de6e825e0851e5097443b0ebec0cbddaa75cb3d6 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index eb655cee19f7e2a38e3d4173fb4fc74ff276a52b..de0fe9e65746e898c1fbf7eeeb3501154edb5fbe 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1178,3 +1178,19 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_guc_ct_print(&guc->ct, p, false); xe_guc_submit_print(guc, p); } + +/** + * xe_guc_declare_wedged() - Declare GuC wedged + * @guc: the GuC object + * + * Wedge the GuC which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_guc_declare_wedged(struct xe_guc *guc) +{ + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + + xe_guc_reset_prepare(guc); + xe_guc_ct_stop(&guc->ct); + xe_guc_submit_wedge(guc); +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index af59c95457539a20ae77278f6fec3903ec45a38d..e0bbf98f849ddde6768d6c7e70197327b72d82bd 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -37,6 +37,7 @@ void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); +void xe_guc_declare_wedged(struct xe_guc *guc); static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d83d5f8606277c1e0cd1a74d0fe7e6..64afc90ad2c51942069efcd5ac078962a11bcce3 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 373447758a60253dde0c890802a2dd29379f8b91..6398629e6b4ecdc9d3f5df700e097aa708fb6d46 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -861,29 +861,27 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } -static bool guc_submit_hint_wedged(struct xe_guc *guc) +/** + * xe_guc_submit_wedge() - Wedge GuC submission + * @guc: the GuC object + * + * Save exec queue's registered with GuC state by taking a ref to each queue. + * Register a DRMM handler to drop refs upon driver unload. + */ +void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; unsigned long index; int err; - if (xe->wedged.mode != 2) - return false; - - if (xe_device_wedged(xe)) - return true; - - xe_device_declare_wedged(xe); - - xe_guc_submit_reset_prepare(guc); - xe_guc_ct_stop(&guc->ct); + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); - return true; /* Device is wedged anyway */ + return; } mutex_lock(&guc->submission_state.lock); @@ -891,6 +889,19 @@ static bool guc_submit_hint_wedged(struct xe_guc *guc) if (xe_exec_queue_get_unless_zero(q)) set_exec_queue_wedged(q); mutex_unlock(&guc->submission_state.lock); +} + +static bool guc_submit_hint_wedged(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + if (xe->wedged.mode != 2) + return false; + + if (xe_device_wedged(xe)) + return true; + + xe_device_declare_wedged(xe); return true; } @@ -1382,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1471,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; @@ -1677,7 +1692,8 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) void xe_guc_submit_reset_wait(struct xe_guc *guc) { - wait_event(guc->ct.wq, !guc_read_stopped(guc)); + wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || + !guc_read_stopped(guc)); } void xe_guc_submit_stop(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 4ad5f4c1b08428ac7ac21cf90292e8d93bb3092a..bdf8c9f3d24a2292c88872b382eec4e0e148b521 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -18,6 +18,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025055f14941eca518fff9f9c87525b..832ea81faeee509811cd0e312045715ba048136f 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95eb1e31f5bfd41a772e8d9662652c1..58121821f0814a35b2947b0417ebdde4b62742c8 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0adb4a480a46cbdafbafd5e32990927..31a751a5de3f1f13d956973863e83a8e56058050 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 02e28274282f9330b9d27e87d3bde55d01fafc75..5efe83cc82ab5e6eea2668381e0accddf1e343d4 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe, if (first == last) bitmap_set(ctx->active_entries, first, 1); else - bitmap_set(ctx->active_entries, first, last - first + 2); + bitmap_set(ctx->active_entries, first, last - first + 1); } /** diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2883d9aca404e658d6bf182bc2489ae3f9fc8db5..e8d31e0108601981a1ba70434f97e7d80945a342 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; @@ -263,7 +267,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c86250d3d23106888c215e034f617..423b261ea7430533ba3986df29fba3c7d54dc6f0 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0f240534fb725d1f7441cfbfcab701295eae7ad6..0d073a9987c2e8732e120049832ddba3b80caf9a 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -300,3 +300,17 @@ void xe_uc_remove(struct xe_uc *uc) { xe_gsc_remove(&uc->gsc); } + +/** + * xe_uc_declare_wedged() - Declare UC wedged + * @uc: the UC object + * + * Wedge the UC which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_uc_declare_wedged(struct xe_uc *uc) +{ + xe_gt_assert(uc_to_gt(uc), uc_to_xe(uc)->wedged.mode); + + xe_guc_declare_wedged(&uc->guc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 11856f24e6f9ab74d9e6cdf97dcc606e319e9524..506517c1133397f746d53284e3f45a72fd4935a8 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -21,5 +21,6 @@ int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_remove(struct xe_uc *uc); +void xe_uc_declare_wedged(struct xe_uc *uc); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684e24e05e0309976ee2d378af5dff2..c7561a56abaf203a4b5bda80910e48633bcfbd00 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) @@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; - int ret; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[id], true); + /* * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; - } - } + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[id]); + goto wait; + } - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; + tile_needs_invalidate |= BIT(id); } } +wait: + for_each_tile(tile, xe, id) + if (tile_needs_invalidate & BIT(id)) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index bdb578e0899f55bb1b632cd17ef10200b52e3dd9..4b59687ff5d821ebb3791e1288592b6c9f6f159d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -288,12 +288,22 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) mp2_ops->start(privdata, info); cl_data->sensor_sts[i] = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + cl_data->is_any_sensor_enabled = true; + } + + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", + cl_data->is_any_sensor_enabled); + rc = -EOPNOTSUPP; + goto cleanup; } for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->cur_hid_dev = i; if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { - cl_data->is_any_sensor_enabled = true; rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; @@ -305,12 +315,6 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); } - if (!cl_data->is_any_sensor_enabled || - (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - rc = -EOPNOTSUPP; - goto cleanup; - } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b523370684551c349872252403467f1d0f67a..81f3024b7b1b51207cb75534afa8cf87cda5530d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } diff --git a/drivers/hid/bpf/Kconfig b/drivers/hid/bpf/Kconfig index 83214bae67687ab497cfd7bc3116c528dcd6a2f6..d65482e02a6c27bf4bc1ab41bf66df2ba0a98041 100644 --- a/drivers/hid/bpf/Kconfig +++ b/drivers/hid/bpf/Kconfig @@ -3,7 +3,7 @@ menu "HID-BPF support" config HID_BPF bool "HID-BPF support" - depends on BPF + depends on BPF_JIT depends on BPF_SYSCALL depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS help diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index f59cce6e437f7720f028d988d79b23a5810dfedd..cd696c59ba0f4faec12234cb3154ca09dced5384 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -183,6 +183,10 @@ static int hid_bpf_reg(void *kdata, struct bpf_link *link) struct hid_device *hdev; int count, err = 0; + /* prevent multiple attach of the same struct_ops */ + if (ops->hdev) + return -EINVAL; + hdev = hid_get_device(ops->hid_id); if (IS_ERR(hdev)) return PTR_ERR(hdev); @@ -248,6 +252,7 @@ static void hid_bpf_unreg(void *kdata, struct bpf_link *link) list_del_rcu(&ops->list); synchronize_srcu(&hdev->bpf.srcu); + ops->hdev = NULL; reconnect = hdev->bpf.rdesc_ops == ops; if (reconnect) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c211d202ccb8f6996b1cd2818ee69e..a282388b7aa5c1df7d05e53f5712cfb5f6ef6958 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1248,6 +1248,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b511c3ff18388c2bdf5d95e3f4637..0fa785f52707ac22a7e39a22f5a9e643ec734465 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b98c6e5a23b67eb83cd9818db8be3..781c5aa298598a19bf79b22a0e6af0560053846a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 @@ -520,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f245a9086b8dc73610cb83642b4b7b7..99812c0f830b5e432a4f443c09726680e63e39bb 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index a44367aef62164f1e76eeecd4ee22d2ad61c6a26..2541fa2e0fa3b15aff4f393d314f740cdb597ea4 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -692,78 +692,28 @@ static bool wacom_is_art_pen(int tool_id) static int wacom_intuos_get_tool_type(int tool_id) { - int tool_type = BTN_TOOL_PEN; - - if (wacom_is_art_pen(tool_id)) - return tool_type; - switch (tool_id) { case 0x812: /* Inking pen */ case 0x801: /* Intuos3 Inking pen */ case 0x12802: /* Intuos4/5 Inking Pen */ case 0x012: - tool_type = BTN_TOOL_PENCIL; - break; - - case 0x822: /* Pen */ - case 0x842: - case 0x852: - case 0x823: /* Intuos3 Grip Pen */ - case 0x813: /* Intuos3 Classic Pen */ - case 0x802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x8e2: /* IntuosHT2 pen */ - case 0x022: - case 0x200: /* Pro Pen 3 */ - case 0x04200: /* Pro Pen 3 */ - case 0x10842: /* MobileStudio Pro Pro Pen slim */ - case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ - case 0x16802: /* Cintiq 13HD Pro Pen */ - case 0x18802: /* DTH2242 Pen */ - case 0x10802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */ - tool_type = BTN_TOOL_PEN; - break; + return BTN_TOOL_PENCIL; case 0x832: /* Stroke pen */ case 0x032: - tool_type = BTN_TOOL_BRUSH; - break; + return BTN_TOOL_BRUSH; case 0x007: /* Mouse 4D and 2D */ case 0x09c: case 0x094: case 0x017: /* Intuos3 2D Mouse */ case 0x806: /* Intuos4 Mouse */ - tool_type = BTN_TOOL_MOUSE; - break; + return BTN_TOOL_MOUSE; case 0x096: /* Lens cursor */ case 0x097: /* Intuos3 Lens cursor */ case 0x006: /* Intuos4 Lens cursor */ - tool_type = BTN_TOOL_LENS; - break; - - case 0x82a: /* Eraser */ - case 0x84a: - case 0x85a: - case 0x91a: - case 0xd1a: - case 0x0fa: - case 0x82b: /* Intuos3 Grip Pen Eraser */ - case 0x81b: /* Intuos3 Classic Pen Eraser */ - case 0x91b: /* Intuos3 Airbrush Eraser */ - case 0x80c: /* Intuos4/5 13HD/24HD Marker Pen Eraser */ - case 0x80a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - case 0x90a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1480a: /* Intuos4/5 13HD/24HD Classic Pen Eraser */ - case 0x1090a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1080c: /* Intuos4/5 13HD/24HD Art Pen Eraser */ - case 0x1084a: /* MobileStudio Pro Pro Pen slim Eraser */ - case 0x1680a: /* Cintiq 13HD Pro Pen Eraser */ - case 0x1880a: /* DTH2242 Eraser */ - case 0x1080a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - tool_type = BTN_TOOL_RUBBER; - break; + return BTN_TOOL_LENS; case 0xd12: case 0x912: @@ -771,10 +721,13 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x913: /* Intuos3 Airbrush */ case 0x902: /* Intuos4/5 13HD/24HD Airbrush */ case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */ - tool_type = BTN_TOOL_AIRBRUSH; - break; + return BTN_TOOL_AIRBRUSH; + + default: + if (tool_id & 0x0008) + return BTN_TOOL_RUBBER; + return BTN_TOOL_PEN; } - return tool_type; } static void wacom_exit_report(struct wacom_wac *wacom) @@ -1925,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index bc186c61a2c0a575118f1c97bf28f39881da5c31..382a2bb9168a50ddde0fe2ecdd72d2d635a4dfce 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -22,23 +22,23 @@ #include /* Indexes for the sysfs hooks */ - -#define INPUT 0 -#define MIN 1 -#define MAX 2 -#define CONTROL 3 -#define OFFSET 3 -#define AUTOMIN 4 -#define THERM 5 -#define HYSTERSIS 6 - +enum adt_sysfs_id { + INPUT = 0, + MIN = 1, + MAX = 2, + CONTROL = 3, + OFFSET = 3, // Dup + AUTOMIN = 4, + THERM = 5, + HYSTERSIS = 6, /* * These are unique identifiers for the sysfs functions - unlike the * numbers above, these are not also indexes into an array */ + ALARM = 9, + FAULT = 10, +}; -#define ALARM 9 -#define FAULT 10 /* 7475 Common Registers */ diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0a8b95ce35f79336251675d7f7bb491e58765b9d..06e836e3e877339233d5c514c63e8b66c09000ba 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,12 +986,17 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); - if (ret) + if (ret) { + clk_disable_unprepare(gi2c->core_clk); + geni_icc_disable(&gi2c->se); return ret; + } enable_irq(gi2c->irq); gi2c->suspended = 0; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558dff21a54120ea2f8b01e3dd8a3f00..1df5b42041427c87753cd10739805412890cf726 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 4e03b75f9ad7724d2306ee4a85f5acc6e3f9769f..4c550306f3ec5955f553251724a7d3ebe3d8547c 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -18,7 +18,7 @@ enum testunit_cmds { TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ - TU_CMD_HOST_NOTIFY, + TU_CMD_SMBUS_HOST_NOTIFY, TU_CMD_SMBUS_BLOCK_PROC_CALL, TU_NUM_CMDS }; @@ -60,7 +60,7 @@ static void i2c_slave_testunit_work(struct work_struct *work) msg.len = tu->regs[TU_REG_DATAH]; break; - case TU_CMD_HOST_NOTIFY: + case TU_CMD_SMBUS_HOST_NOTIFY: msg.addr = 0x08; msg.flags = 0; msg.len = 3; diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 7e4203df83edb543c7a416ad77106b4137e9c6e8..8256f7aed0cf860ae548a52899380d2df0edab5a 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -34,6 +34,7 @@ static int smbus_do_alert(struct device *dev, void *addrp) struct i2c_client *client = i2c_verify_client(dev); struct alert_data *data = addrp; struct i2c_driver *driver; + int ret; if (!client || client->addr != data->addr) return 0; @@ -47,16 +48,47 @@ static int smbus_do_alert(struct device *dev, void *addrp) device_lock(dev); if (client->dev.driver) { driver = to_i2c_driver(client->dev.driver); - if (driver->alert) + if (driver->alert) { + /* Stop iterating after we find the device */ driver->alert(client, data->type, data->data); - else + ret = -EBUSY; + } else { dev_warn(&client->dev, "no driver alert()!\n"); - } else + ret = -EOPNOTSUPP; + } + } else { dev_dbg(&client->dev, "alert with no driver\n"); + ret = -ENODEV; + } + device_unlock(dev); + + return ret; +} + +/* Same as above, but call back all drivers with alert handler */ + +static int smbus_do_alert_force(struct device *dev, void *addrp) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct alert_data *data = addrp; + struct i2c_driver *driver; + + if (!client || (client->flags & I2C_CLIENT_TEN)) + return 0; + + /* + * Drivers should either disable alerts, or provide at least + * a minimal handler. Lock so the driver won't change. + */ + device_lock(dev); + if (client->dev.driver) { + driver = to_i2c_driver(client->dev.driver); + if (driver->alert) + driver->alert(client, data->type, data->data); + } device_unlock(dev); - /* Stop iterating after we find the device */ - return -EBUSY; + return 0; } /* @@ -67,6 +99,7 @@ static irqreturn_t smbus_alert(int irq, void *d) { struct i2c_smbus_alert *alert = d; struct i2c_client *ara; + unsigned short prev_addr = I2C_CLIENT_END; /* Not a valid address */ ara = alert->ara; @@ -94,8 +127,25 @@ static irqreturn_t smbus_alert(int irq, void *d) data.addr, data.data); /* Notify driver for the device which issued the alert */ - device_for_each_child(&ara->adapter->dev, &data, - smbus_do_alert); + status = device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert); + /* + * If we read the same address more than once, and the alert + * was not handled by a driver, it won't do any good to repeat + * the loop because it will never terminate. Try again, this + * time calling the alert handlers of all devices connected to + * the bus, and abort the loop afterwards. If this helps, we + * are all set. If it doesn't, there is nothing else we can do, + * so we might as well abort the loop. + * Note: This assumes that a driver with alert handler handles + * the alert properly and clears it if necessary. + */ + if (data.addr == prev_addr && status != -EBUSY) { + device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert_force); + break; + } + prev_addr = data.addr; } return IRQ_HANDLED; diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 4d99a3524171b9908315bb243fee9c1a3666eb8c..433f6088b7cec8c77288ee24dbee8b18338aa1eb 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -10,8 +10,6 @@ #include -extern const struct bus_type i3c_bus_type; - void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 00a3e9d01547b96f695613231b7ff5c389df9694..7028f03c2c42e24b25f1339e56df2ad13abf04f2 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -342,6 +342,7 @@ const struct bus_type i3c_bus_type = { .probe = i3c_device_probe, .remove = i3c_device_remove, }; +EXPORT_SYMBOL_GPL(i3c_bus_type); static enum i3c_addr_slot_status i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr) diff --git a/drivers/i3c/master/ast2600-i3c-master.c b/drivers/i3c/master/ast2600-i3c-master.c index 01a47d3dd499f56d9a71e69f502c16ce5145189f..84942dbb6f8028cc5d02717912e6174fa0228afe 100644 --- a/drivers/i3c/master/ast2600-i3c-master.c +++ b/drivers/i3c/master/ast2600-i3c-master.c @@ -156,7 +156,6 @@ static int ast2600_i3c_probe(struct platform_device *pdev) i3c->sda_pullup); i3c->dw.platform_ops = &ast2600_i3c_ops; - i3c->dw.ibi_capable = true; return dw_i3c_common_probe(&i3c->dw, pdev); } diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 0ec00e644bd47142b29290f03a84d8657f1dd513..8d694672c1104f8ece5428dcd3711b6b4a02ba54 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include #include #include @@ -217,7 +219,7 @@ #define I3C_BUS_THIGH_MAX_NS 41 #define XFER_TIMEOUT (msecs_to_jiffies(1000)) - +#define RPM_AUTOSUSPEND_TIMEOUT 1000 /* ms */ struct dw_i3c_cmd { u32 cmd_lo; u32 cmd_hi; @@ -300,7 +302,14 @@ static void dw_i3c_master_disable(struct dw_i3c_master *master) static void dw_i3c_master_enable(struct dw_i3c_master *master) { - writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_ENABLE, + u32 dev_ctrl; + + dev_ctrl = readl(master->regs + DEVICE_CTRL); + /* For now don't support Hot-Join */ + dev_ctrl |= DEV_CTRL_HOT_JOIN_NACK; + if (master->i2c_slv_prsnt) + dev_ctrl |= DEV_CTRL_I2C_SLAVE_PRESENT; + writel(dev_ctrl | DEV_CTRL_ENABLE, master->regs + DEVICE_CTRL); } @@ -521,6 +530,32 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr) dw_i3c_master_start_xfer_locked(master); } +static void dw_i3c_master_set_intr_regs(struct dw_i3c_master *master) +{ + u32 thld_ctrl; + + thld_ctrl = readl(master->regs + QUEUE_THLD_CTRL); + thld_ctrl &= ~(QUEUE_THLD_CTRL_RESP_BUF_MASK | + QUEUE_THLD_CTRL_IBI_STAT_MASK | + QUEUE_THLD_CTRL_IBI_DATA_MASK); + thld_ctrl |= QUEUE_THLD_CTRL_IBI_STAT(1) | + QUEUE_THLD_CTRL_IBI_DATA(31); + writel(thld_ctrl, master->regs + QUEUE_THLD_CTRL); + + thld_ctrl = readl(master->regs + DATA_BUFFER_THLD_CTRL); + thld_ctrl &= ~DATA_BUFFER_THLD_CTRL_RX_BUF; + writel(thld_ctrl, master->regs + DATA_BUFFER_THLD_CTRL); + + writel(INTR_ALL, master->regs + INTR_STATUS); + writel(INTR_MASTER_MASK, master->regs + INTR_STATUS_EN); + writel(INTR_MASTER_MASK, master->regs + INTR_SIGNAL_EN); + + master->sir_rej_mask = IBI_REQ_REJECT_ALL; + writel(master->sir_rej_mask, master->regs + IBI_SIR_REQ_REJECT); + + writel(IBI_REQ_REJECT_ALL, master->regs + IBI_MR_REQ_REJECT); +} + static int dw_i3c_clk_cfg(struct dw_i3c_master *master) { unsigned long core_rate, core_period; @@ -543,18 +578,22 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master) scl_timing = SCL_I3C_TIMING_HCNT(hcnt) | SCL_I3C_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I3C_PP_TIMING); + master->i3c_pp_timing = scl_timing; /* * In pure i3c mode, MST_FREE represents tCAS. In shared mode, this * will be set up by dw_i2c_clk_cfg as tLOW. */ - if (master->base.bus.mode == I3C_BUS_MODE_PURE) + if (master->base.bus.mode == I3C_BUS_MODE_PURE) { writel(BUS_I3C_MST_FREE(lcnt), master->regs + BUS_FREE_TIMING); + master->bus_free_timing = BUS_I3C_MST_FREE(lcnt); + } lcnt = max_t(u8, DIV_ROUND_UP(I3C_BUS_TLOW_OD_MIN_NS, core_period), lcnt); scl_timing = SCL_I3C_TIMING_HCNT(hcnt) | SCL_I3C_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I3C_OD_TIMING); + master->i3c_od_timing = scl_timing; lcnt = DIV_ROUND_UP(core_rate, I3C_BUS_SDR1_SCL_RATE) - hcnt; scl_timing = SCL_EXT_LCNT_1(lcnt); @@ -565,6 +604,7 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master) lcnt = DIV_ROUND_UP(core_rate, I3C_BUS_SDR4_SCL_RATE) - hcnt; scl_timing |= SCL_EXT_LCNT_4(lcnt); writel(scl_timing, master->regs + SCL_EXT_LCNT_TIMING); + master->ext_lcnt_timing = scl_timing; return 0; } @@ -586,16 +626,21 @@ static int dw_i2c_clk_cfg(struct dw_i3c_master *master) scl_timing = SCL_I2C_FMP_TIMING_HCNT(hcnt) | SCL_I2C_FMP_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FMP_TIMING); + master->i2c_fmp_timing = scl_timing; lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FM_TLOW_MIN_NS, core_period); hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_RATE) - lcnt; scl_timing = SCL_I2C_FM_TIMING_HCNT(hcnt) | SCL_I2C_FM_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FM_TIMING); + master->i2c_fm_timing = scl_timing; writel(BUS_I3C_MST_FREE(lcnt), master->regs + BUS_FREE_TIMING); + master->bus_free_timing = BUS_I3C_MST_FREE(lcnt); + writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_I2C_SLAVE_PRESENT, master->regs + DEVICE_CTRL); + master->i2c_slv_prsnt = true; return 0; } @@ -605,69 +650,58 @@ static int dw_i3c_master_bus_init(struct i3c_master_controller *m) struct dw_i3c_master *master = to_dw_i3c_master(m); struct i3c_bus *bus = i3c_master_get_bus(m); struct i3c_device_info info = { }; - u32 thld_ctrl; int ret; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + ret = master->platform_ops->init(master); if (ret) - return ret; + goto rpm_out; switch (bus->mode) { case I3C_BUS_MODE_MIXED_FAST: case I3C_BUS_MODE_MIXED_LIMITED: ret = dw_i2c_clk_cfg(master); if (ret) - return ret; + goto rpm_out; fallthrough; case I3C_BUS_MODE_PURE: ret = dw_i3c_clk_cfg(master); if (ret) - return ret; + goto rpm_out; break; default: - return -EINVAL; + ret = -EINVAL; + goto rpm_out; } - thld_ctrl = readl(master->regs + QUEUE_THLD_CTRL); - thld_ctrl &= ~(QUEUE_THLD_CTRL_RESP_BUF_MASK | - QUEUE_THLD_CTRL_IBI_STAT_MASK | - QUEUE_THLD_CTRL_IBI_STAT_MASK); - thld_ctrl |= QUEUE_THLD_CTRL_IBI_STAT(1) | - QUEUE_THLD_CTRL_IBI_DATA(31); - writel(thld_ctrl, master->regs + QUEUE_THLD_CTRL); - - thld_ctrl = readl(master->regs + DATA_BUFFER_THLD_CTRL); - thld_ctrl &= ~DATA_BUFFER_THLD_CTRL_RX_BUF; - writel(thld_ctrl, master->regs + DATA_BUFFER_THLD_CTRL); - - writel(INTR_ALL, master->regs + INTR_STATUS); - writel(INTR_MASTER_MASK, master->regs + INTR_STATUS_EN); - writel(INTR_MASTER_MASK, master->regs + INTR_SIGNAL_EN); - ret = i3c_master_get_free_addr(m, 0); if (ret < 0) - return ret; + goto rpm_out; writel(DEV_ADDR_DYNAMIC_ADDR_VALID | DEV_ADDR_DYNAMIC(ret), master->regs + DEVICE_ADDR); - + master->dev_addr = ret; memset(&info, 0, sizeof(info)); info.dyn_addr = ret; ret = i3c_master_set_info(&master->base, &info); if (ret) - return ret; - - writel(IBI_REQ_REJECT_ALL, master->regs + IBI_SIR_REQ_REJECT); - writel(IBI_REQ_REJECT_ALL, master->regs + IBI_MR_REQ_REJECT); - - /* For now don't support Hot-Join */ - writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, - master->regs + DEVICE_CTRL); + goto rpm_out; + dw_i3c_master_set_intr_regs(master); dw_i3c_master_enable(master); - return 0; +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + return ret; } static void dw_i3c_master_bus_cleanup(struct i3c_master_controller *m) @@ -769,11 +803,21 @@ static int dw_i3c_master_send_ccc_cmd(struct i3c_master_controller *m, if (ccc->id == I3C_CCC_ENTDAA) return -EINVAL; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + if (ccc->rnw) ret = dw_i3c_ccc_get(master, ccc); else ret = dw_i3c_ccc_set(master, ccc); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -786,6 +830,14 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) u8 p, last_addr = 0; int ret, pos; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + olddevs = ~(master->free_pos); /* Prepare DAT before launching DAA. */ @@ -794,8 +846,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) continue; ret = i3c_master_get_free_addr(m, last_addr + 1); - if (ret < 0) - return -ENOSPC; + if (ret < 0) { + ret = -ENOSPC; + goto rpm_out; + } master->devs[pos].addr = ret; p = even_parity(ret); @@ -805,16 +859,21 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(ret), master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, pos)); + + ret = 0; } xfer = dw_i3c_master_alloc_xfer(master, 1); - if (!xfer) - return -ENOMEM; + if (!xfer) { + ret = -ENOMEM; + goto rpm_out; + } pos = dw_i3c_master_get_free_pos(master); if (pos < 0) { dw_i3c_master_free_xfer(xfer); - return pos; + ret = pos; + goto rpm_out; } cmd = &xfer->cmds[0]; cmd->cmd_hi = 0x1; @@ -839,7 +898,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) dw_i3c_master_free_xfer(xfer); - return 0; +rpm_out: + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + return ret; } static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, @@ -874,6 +936,14 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (!xfer) return -ENOMEM; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + for (i = 0; i < i3c_nxfers; i++) { struct dw_i3c_cmd *cmd = &xfer->cmds[i]; @@ -915,6 +985,8 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1025,6 +1097,14 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (!xfer) return -ENOMEM; + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } + for (i = 0; i < i2c_nxfers; i++) { struct dw_i3c_cmd *cmd = &xfer->cmds[i]; @@ -1055,6 +1135,8 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1075,6 +1157,7 @@ static int dw_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev) data->index = pos; master->devs[pos].addr = dev->addr; + master->devs[pos].is_i2c_addr = true; master->free_pos &= ~BIT(pos); i2c_dev_set_master_data(dev, data); @@ -1175,17 +1258,16 @@ static void dw_i3c_master_set_sir_enabled(struct dw_i3c_master *master, master->platform_ops->set_dat_ibi(master, dev, enable, ®); writel(reg, master->regs + dat_entry); - reg = readl(master->regs + IBI_SIR_REQ_REJECT); if (enable) { - global = reg == 0xffffffff; - reg &= ~BIT(idx); + global = (master->sir_rej_mask == IBI_REQ_REJECT_ALL); + master->sir_rej_mask &= ~BIT(idx); } else { bool hj_rejected = !!(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_HOT_JOIN_NACK); - reg |= BIT(idx); - global = (reg == 0xffffffff) && hj_rejected; + master->sir_rej_mask |= BIT(idx); + global = (master->sir_rej_mask == IBI_REQ_REJECT_ALL) && hj_rejected; } - writel(reg, master->regs + IBI_SIR_REQ_REJECT); + writel(master->sir_rej_mask, master->regs + IBI_SIR_REQ_REJECT); if (global) dw_i3c_master_enable_sir_signal(master, enable); @@ -1197,6 +1279,15 @@ static void dw_i3c_master_set_sir_enabled(struct dw_i3c_master *master, static int dw_i3c_master_enable_hotjoin(struct i3c_master_controller *m) { struct dw_i3c_master *master = to_dw_i3c_master(m); + int ret; + + ret = pm_runtime_resume_and_get(master->dev); + if (ret < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, ret); + return ret; + } dw_i3c_master_enable_sir_signal(master, true); writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_HOT_JOIN_NACK, @@ -1212,6 +1303,8 @@ static int dw_i3c_master_disable_hotjoin(struct i3c_master_controller *m) writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, master->regs + DEVICE_CTRL); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1222,12 +1315,23 @@ static int dw_i3c_master_enable_ibi(struct i3c_dev_desc *dev) struct dw_i3c_master *master = to_dw_i3c_master(m); int rc; + rc = pm_runtime_resume_and_get(master->dev); + if (rc < 0) { + dev_err(master->dev, + "<%s> cannot resume i3c bus master, err: %d\n", + __func__, rc); + return rc; + } + dw_i3c_master_set_sir_enabled(master, dev, data->index, true); rc = i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); - if (rc) + if (rc) { dw_i3c_master_set_sir_enabled(master, dev, data->index, false); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); + } return rc; } @@ -1245,6 +1349,8 @@ static int dw_i3c_master_disable_ibi(struct i3c_dev_desc *dev) dw_i3c_master_set_sir_enabled(master, dev, data->index, false); + pm_runtime_mark_last_busy(master->dev); + pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1403,21 +1509,6 @@ static const struct i3c_master_controller_ops dw_mipi_i3c_ops = { .attach_i2c_dev = dw_i3c_master_attach_i2c_dev, .detach_i2c_dev = dw_i3c_master_detach_i2c_dev, .i2c_xfers = dw_i3c_master_i2c_xfers, -}; - -static const struct i3c_master_controller_ops dw_mipi_i3c_ibi_ops = { - .bus_init = dw_i3c_master_bus_init, - .bus_cleanup = dw_i3c_master_bus_cleanup, - .attach_i3c_dev = dw_i3c_master_attach_i3c_dev, - .reattach_i3c_dev = dw_i3c_master_reattach_i3c_dev, - .detach_i3c_dev = dw_i3c_master_detach_i3c_dev, - .do_daa = dw_i3c_master_daa, - .supports_ccc_cmd = dw_i3c_master_supports_ccc_cmd, - .send_ccc_cmd = dw_i3c_master_send_ccc_cmd, - .priv_xfers = dw_i3c_master_priv_xfers, - .attach_i2c_dev = dw_i3c_master_attach_i2c_dev, - .detach_i2c_dev = dw_i3c_master_detach_i2c_dev, - .i2c_xfers = dw_i3c_master_i2c_xfers, .request_ibi = dw_i3c_master_request_ibi, .free_ibi = dw_i3c_master_free_ibi, .enable_ibi = dw_i3c_master_enable_ibi, @@ -1455,29 +1546,30 @@ static void dw_i3c_hj_work(struct work_struct *work) int dw_i3c_common_probe(struct dw_i3c_master *master, struct platform_device *pdev) { - const struct i3c_master_controller_ops *ops; int ret, irq; if (!master->platform_ops) master->platform_ops = &dw_i3c_platform_ops_default; + master->dev = &pdev->dev; + master->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(master->regs)) return PTR_ERR(master->regs); - master->core_clk = devm_clk_get(&pdev->dev, NULL); + master->core_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(master->core_clk)) return PTR_ERR(master->core_clk); + master->pclk = devm_clk_get_optional_enabled(&pdev->dev, "pclk"); + if (IS_ERR(master->pclk)) + return PTR_ERR(master->pclk); + master->core_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, "core_rst"); if (IS_ERR(master->core_rst)) return PTR_ERR(master->core_rst); - ret = clk_prepare_enable(master->core_clk); - if (ret) - goto err_disable_core_clk; - reset_control_deassert(master->core_rst); spin_lock_init(&master->xferqueue.lock); @@ -1493,6 +1585,11 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, platform_set_drvdata(pdev, master); + pm_runtime_set_autosuspend_delay(&pdev->dev, RPM_AUTOSUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + /* Information regarding the FIFOs/QUEUEs depth */ ret = readl(master->regs + QUEUE_STATUS_LEVEL); master->caps.cmdfifodepth = QUEUE_STATUS_LEVEL_CMD(ret); @@ -1505,23 +1602,22 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, master->maxdevs = ret >> 16; master->free_pos = GENMASK(master->maxdevs - 1, 0); - ops = &dw_mipi_i3c_ops; - if (master->ibi_capable) - ops = &dw_mipi_i3c_ibi_ops; - INIT_WORK(&master->hj_work, dw_i3c_hj_work); - ret = i3c_master_register(&master->base, &pdev->dev, ops, false); + ret = i3c_master_register(&master->base, &pdev->dev, + &dw_mipi_i3c_ops, false); if (ret) - goto err_assert_rst; + goto err_disable_pm; return 0; +err_disable_pm: + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + err_assert_rst: reset_control_assert(master->core_rst); -err_disable_core_clk: - clk_disable_unprepare(master->core_clk); - return ret; } EXPORT_SYMBOL_GPL(dw_i3c_common_probe); @@ -1530,9 +1626,9 @@ void dw_i3c_common_remove(struct dw_i3c_master *master) { i3c_master_unregister(&master->base); - reset_control_assert(master->core_rst); - - clk_disable_unprepare(master->core_clk); + pm_runtime_disable(master->dev); + pm_runtime_set_suspended(master->dev); + pm_runtime_dont_use_autosuspend(master->dev); } EXPORT_SYMBOL_GPL(dw_i3c_common_remove); @@ -1556,6 +1652,96 @@ static void dw_i3c_remove(struct platform_device *pdev) dw_i3c_common_remove(master); } +static void dw_i3c_master_restore_addrs(struct dw_i3c_master *master) +{ + u32 pos, reg_val; + + writel(DEV_ADDR_DYNAMIC_ADDR_VALID | DEV_ADDR_DYNAMIC(master->dev_addr), + master->regs + DEVICE_ADDR); + + for (pos = 0; pos < master->maxdevs; pos++) { + if (master->free_pos & BIT(pos)) + continue; + + if (master->devs[pos].is_i2c_addr) + reg_val = DEV_ADDR_TABLE_LEGACY_I2C_DEV | + DEV_ADDR_TABLE_STATIC_ADDR(master->devs[pos].addr); + else + reg_val = DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr); + + writel(reg_val, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, pos)); + } +} + +static void dw_i3c_master_restore_timing_regs(struct dw_i3c_master *master) +{ + writel(master->i3c_pp_timing, master->regs + SCL_I3C_PP_TIMING); + writel(master->bus_free_timing, master->regs + BUS_FREE_TIMING); + writel(master->i3c_od_timing, master->regs + SCL_I3C_OD_TIMING); + writel(master->ext_lcnt_timing, master->regs + SCL_EXT_LCNT_TIMING); + + if (master->i2c_slv_prsnt) { + writel(master->i2c_fmp_timing, master->regs + SCL_I2C_FMP_TIMING); + writel(master->i2c_fm_timing, master->regs + SCL_I2C_FM_TIMING); + } +} + +static int dw_i3c_master_enable_clks(struct dw_i3c_master *master) +{ + int ret = 0; + + ret = clk_prepare_enable(master->core_clk); + if (ret) + return ret; + + ret = clk_prepare_enable(master->pclk); + if (ret) { + clk_disable_unprepare(master->core_clk); + return ret; + } + + return 0; +} + +static inline void dw_i3c_master_disable_clks(struct dw_i3c_master *master) +{ + clk_disable_unprepare(master->pclk); + clk_disable_unprepare(master->core_clk); +} + +static int __maybe_unused dw_i3c_master_runtime_suspend(struct device *dev) +{ + struct dw_i3c_master *master = dev_get_drvdata(dev); + + dw_i3c_master_disable(master); + + reset_control_assert(master->core_rst); + dw_i3c_master_disable_clks(master); + pinctrl_pm_select_sleep_state(dev); + return 0; +} + +static int __maybe_unused dw_i3c_master_runtime_resume(struct device *dev) +{ + struct dw_i3c_master *master = dev_get_drvdata(dev); + + pinctrl_pm_select_default_state(dev); + dw_i3c_master_enable_clks(master); + reset_control_deassert(master->core_rst); + + dw_i3c_master_set_intr_regs(master); + dw_i3c_master_restore_timing_regs(master); + dw_i3c_master_restore_addrs(master); + + dw_i3c_master_enable(master); + return 0; +} + +static const struct dev_pm_ops dw_i3c_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(dw_i3c_master_runtime_suspend, dw_i3c_master_runtime_resume, NULL) +}; + static const struct of_device_id dw_i3c_master_of_match[] = { { .compatible = "snps,dw-i3c-master-1.00a", }, {}, @@ -1568,6 +1754,7 @@ static struct platform_driver dw_i3c_driver = { .driver = { .name = "dw-i3c-master", .of_match_table = dw_i3c_master_of_match, + .pm = &dw_i3c_pm_ops, }, }; module_platform_driver(dw_i3c_driver); diff --git a/drivers/i3c/master/dw-i3c-master.h b/drivers/i3c/master/dw-i3c-master.h index 4ab94aa72252e4db7a55ffe5a989f86ca769f5d1..219ff815d3a7349d274fc8e7f2704cbcad07e968 100644 --- a/drivers/i3c/master/dw-i3c-master.h +++ b/drivers/i3c/master/dw-i3c-master.h @@ -19,11 +19,13 @@ struct dw_i3c_master_caps { struct dw_i3c_dat_entry { u8 addr; + bool is_i2c_addr; struct i3c_dev_desc *ibi_dev; }; struct dw_i3c_master { struct i3c_master_controller base; + struct device *dev; u16 maxdevs; u16 datstartaddr; u32 free_pos; @@ -36,10 +38,18 @@ struct dw_i3c_master { void __iomem *regs; struct reset_control *core_rst; struct clk *core_clk; + struct clk *pclk; char version[5]; char type[5]; - bool ibi_capable; - + u32 sir_rej_mask; + bool i2c_slv_prsnt; + u32 dev_addr; + u32 i3c_pp_timing; + u32 i3c_od_timing; + u32 ext_lcnt_timing; + u32 bus_free_timing; + u32 i2c_fm_timing; + u32 i2c_fmp_timing; /* * Per-device hardware data, used to manage the device address table * (DAT) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index d7e966a255833730c40728725e5f6c3817083cd9..4e7d6a43ee9b3b5b74f68934c93b7689c3f3d752 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -631,6 +631,7 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) static int i3c_hci_init(struct i3c_hci *hci) { u32 regval, offset; + bool size_in_dwords; int ret; /* Validate HCI hardware version */ @@ -654,11 +655,16 @@ static int i3c_hci_init(struct i3c_hci *hci) hci->caps = reg_read(HC_CAPABILITIES); DBG("caps = %#x", hci->caps); + size_in_dwords = hci->version_major < 1 || + (hci->version_major == 1 && hci->version_minor < 1); + regval = reg_read(DAT_SECTION); offset = FIELD_GET(DAT_TABLE_OFFSET, regval); hci->DAT_regs = offset ? hci->base_regs + offset : NULL; hci->DAT_entries = FIELD_GET(DAT_TABLE_SIZE, regval); hci->DAT_entry_size = FIELD_GET(DAT_ENTRY_SIZE, regval) ? 0 : 8; + if (size_in_dwords) + hci->DAT_entries = 4 * hci->DAT_entries / hci->DAT_entry_size; dev_info(&hci->master.dev, "DAT: %u %u-bytes entries at offset %#x\n", hci->DAT_entries, hci->DAT_entry_size, offset); @@ -667,6 +673,8 @@ static int i3c_hci_init(struct i3c_hci *hci) hci->DCT_regs = offset ? hci->base_regs + offset : NULL; hci->DCT_entries = FIELD_GET(DCT_TABLE_SIZE, regval); hci->DCT_entry_size = FIELD_GET(DCT_ENTRY_SIZE, regval) ? 0 : 16; + if (size_in_dwords) + hci->DCT_entries = 4 * hci->DCT_entries / hci->DCT_entry_size; dev_info(&hci->master.dev, "DCT: %u %u-bytes entries at offset %#x\n", hci->DCT_entries, hci->DCT_entry_size, offset); diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index 4e01a95cc4d0ad2485caf13fa6776374ce6d0e91..a918e96b21fddc584b39b06f4a60d6ab63f59f61 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -147,21 +147,6 @@ struct hci_dma_dev_ibi_data { unsigned int max_len; }; -static inline u32 lo32(dma_addr_t physaddr) -{ - return physaddr; -} - -static inline u32 hi32(dma_addr_t physaddr) -{ - /* trickery to avoid compiler warnings on 32-bit build targets */ - if (sizeof(dma_addr_t) > 4) { - u64 hi = physaddr; - return hi >> 32; - } - return 0; -} - static void hci_dma_cleanup(struct i3c_hci *hci) { struct hci_rings_data *rings = hci->io_data; @@ -265,10 +250,10 @@ static int hci_dma_init(struct i3c_hci *hci) if (!rh->xfer || !rh->resp || !rh->src_xfers) goto err_out; - rh_reg_write(CMD_RING_BASE_LO, lo32(rh->xfer_dma)); - rh_reg_write(CMD_RING_BASE_HI, hi32(rh->xfer_dma)); - rh_reg_write(RESP_RING_BASE_LO, lo32(rh->resp_dma)); - rh_reg_write(RESP_RING_BASE_HI, hi32(rh->resp_dma)); + rh_reg_write(CMD_RING_BASE_LO, lower_32_bits(rh->xfer_dma)); + rh_reg_write(CMD_RING_BASE_HI, upper_32_bits(rh->xfer_dma)); + rh_reg_write(RESP_RING_BASE_LO, lower_32_bits(rh->resp_dma)); + rh_reg_write(RESP_RING_BASE_HI, upper_32_bits(rh->resp_dma)); regval = FIELD_PREP(CR_RING_SIZE, rh->xfer_entries); rh_reg_write(CR_SETUP, regval); @@ -294,7 +279,17 @@ static int hci_dma_init(struct i3c_hci *hci) rh->ibi_chunk_sz = dma_get_cache_alignment(); rh->ibi_chunk_sz *= IBI_CHUNK_CACHELINES; - BUG_ON(rh->ibi_chunk_sz > 256); + /* + * Round IBI data chunk size to number of bytes supported by + * the HW. Chunk size can be 2^n number of DWORDs which is the + * same as 2^(n+2) bytes, where n is 0..6. + */ + rh->ibi_chunk_sz = umax(4, rh->ibi_chunk_sz); + rh->ibi_chunk_sz = roundup_pow_of_two(rh->ibi_chunk_sz); + if (rh->ibi_chunk_sz > 256) { + ret = -EINVAL; + goto err_out; + } ibi_status_ring_sz = rh->ibi_status_sz * rh->ibi_status_entries; ibi_data_ring_sz = rh->ibi_chunk_sz * rh->ibi_chunks_total; @@ -315,6 +310,11 @@ static int hci_dma_init(struct i3c_hci *hci) goto err_out; } + rh_reg_write(IBI_STATUS_RING_BASE_LO, lower_32_bits(rh->ibi_status_dma)); + rh_reg_write(IBI_STATUS_RING_BASE_HI, upper_32_bits(rh->ibi_status_dma)); + rh_reg_write(IBI_DATA_RING_BASE_LO, lower_32_bits(rh->ibi_data_dma)); + rh_reg_write(IBI_DATA_RING_BASE_HI, upper_32_bits(rh->ibi_data_dma)); + regval = FIELD_PREP(IBI_STATUS_RING_SIZE, rh->ibi_status_entries) | FIELD_PREP(IBI_DATA_CHUNK_SIZE, @@ -404,8 +404,8 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, hci_dma_unmap_xfer(hci, xfer_list, i); return -ENOMEM; } - *ring_data++ = lo32(xfer->data_dma); - *ring_data++ = hi32(xfer->data_dma); + *ring_data++ = lower_32_bits(xfer->data_dma); + *ring_data++ = upper_32_bits(xfer->data_dma); } else { *ring_data++ = 0; *ring_data++ = 0; diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index bb299ce02cccbdb46541f2fcb7ab749843e5b98e..0a68fd1b81d4e90c653ed6d0b1760de51c93aef9 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -790,7 +790,20 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, int ret, i; while (true) { - /* Enter/proceed with DAA */ + /* SVC_I3C_MCTRL_REQUEST_PROC_DAA have two mode, ENTER DAA or PROCESS DAA. + * + * ENTER DAA: + * 1 will issue START, 7E, ENTDAA, and then emits 7E/R to process first target. + * 2 Stops just before the new Dynamic Address (DA) is to be emitted. + * + * PROCESS DAA: + * 1 The DA is written using MWDATAB or ADDR bits 6:0. + * 2 ProcessDAA is requested again to write the new address, and then starts the + * next (START, 7E, ENTDAA) unless marked to STOP; an MSTATUS indicating NACK + * means DA was not accepted (e.g. parity error). If PROCESSDAA is NACKed on the + * 7E/R, which means no more Slaves need a DA, then a COMPLETE will be signaled + * (along with DONE), and a STOP issued automatically. + */ writel(SVC_I3C_MCTRL_REQUEST_PROC_DAA | SVC_I3C_MCTRL_TYPE_I3C | SVC_I3C_MCTRL_IBIRESP_NACK | @@ -807,7 +820,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, SVC_I3C_MSTATUS_MCTRLDONE(reg), 1, 1000); if (ret) - return ret; + break; if (SVC_I3C_MSTATUS_RXPEND(reg)) { u8 data[6]; @@ -819,7 +832,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, */ ret = svc_i3c_master_readb(master, data, 6); if (ret) - return ret; + break; for (i = 0; i < 6; i++) prov_id[dev_nb] |= (u64)(data[i]) << (8 * (5 - i)); @@ -827,7 +840,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, /* We do not care about the BCR and DCR yet */ ret = svc_i3c_master_readb(master, data, 2); if (ret) - return ret; + break; } else if (SVC_I3C_MSTATUS_MCTRLDONE(reg)) { if (SVC_I3C_MSTATUS_STATE_IDLE(reg) && SVC_I3C_MSTATUS_COMPLETE(reg)) { @@ -835,12 +848,23 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * All devices received and acked they dynamic * address, this is the natural end of the DAA * procedure. + * + * Hardware will auto emit STOP at this case. */ - break; + *count = dev_nb; + return 0; + } else if (SVC_I3C_MSTATUS_NACKED(reg)) { /* No I3C devices attached */ - if (dev_nb == 0) + if (dev_nb == 0) { + /* + * Hardware can't treat first NACK for ENTAA as normal + * COMPLETE. So need manual emit STOP. + */ + ret = 0; + *count = 0; break; + } /* * A slave device nacked the address, this is @@ -849,8 +873,10 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * answer again immediately and shall ack the * address this time. */ - if (prov_id[dev_nb] == nacking_prov_id) - return -EIO; + if (prov_id[dev_nb] == nacking_prov_id) { + ret = -EIO; + break; + } dev_nb--; nacking_prov_id = prov_id[dev_nb]; @@ -858,7 +884,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, continue; } else { - return -EIO; + break; } } @@ -870,12 +896,12 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, SVC_I3C_MSTATUS_BETWEEN(reg), 0, 1000); if (ret) - return ret; + break; /* Give the slave device a suitable dynamic address */ ret = i3c_master_get_free_addr(&master->base, last_addr + 1); if (ret < 0) - return ret; + break; addrs[dev_nb] = ret; dev_dbg(master->dev, "DAA: device %d assigned to 0x%02x\n", @@ -885,9 +911,9 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, last_addr = addrs[dev_nb++]; } - *count = dev_nb; - - return 0; + /* Need manual issue STOP except for Complete condition */ + svc_i3c_master_emit_stop(master); + return ret; } static int svc_i3c_update_ibirules(struct svc_i3c_master *master) @@ -961,11 +987,10 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) spin_lock_irqsave(&master->xferqueue.lock, flags); ret = svc_i3c_master_do_daa_locked(master, addrs, &dev_nb); spin_unlock_irqrestore(&master->xferqueue.lock, flags); - if (ret) { - svc_i3c_master_emit_stop(master); - svc_i3c_master_clear_merrwarn(master); + + svc_i3c_master_clear_merrwarn(master); + if (ret) goto rpm_out; - } /* Register all devices who participated to the core */ for (i = 0; i < dev_nb; i++) { @@ -1052,29 +1077,59 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, u8 *in, const u8 *out, unsigned int xfer_len, unsigned int *actual_len, bool continued) { + int retry = 2; u32 reg; int ret; /* clean SVC_I3C_MINT_IBIWON w1c bits */ writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); - writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | - xfer_type | - SVC_I3C_MCTRL_IBIRESP_NACK | - SVC_I3C_MCTRL_DIR(rnw) | - SVC_I3C_MCTRL_ADDR(addr) | - SVC_I3C_MCTRL_RDTERM(*actual_len), - master->regs + SVC_I3C_MCTRL); - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, + while (retry--) { + writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | + xfer_type | + SVC_I3C_MCTRL_IBIRESP_NACK | + SVC_I3C_MCTRL_DIR(rnw) | + SVC_I3C_MCTRL_ADDR(addr) | + SVC_I3C_MCTRL_RDTERM(*actual_len), + master->regs + SVC_I3C_MCTRL); + + ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, SVC_I3C_MSTATUS_MCTRLDONE(reg), 0, 1000); - if (ret) - goto emit_stop; + if (ret) + goto emit_stop; - if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { - ret = -ENXIO; - *actual_len = 0; - goto emit_stop; + if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { + /* + * According to I3C Spec 1.1.1, 11-Jun-2021, section: 5.1.2.2.3. + * If the Controller chooses to start an I3C Message with an I3C Dynamic + * Address, then special provisions shall be made because that same I3C + * Target may be initiating an IBI or a Controller Role Request. So, one of + * three things may happen: (skip 1, 2) + * + * 3. The Addresses match and the RnW bits also match, and so neither + * Controller nor Target will ACK since both are expecting the other side to + * provide ACK. As a result, each side might think it had "won" arbitration, + * but neither side would continue, as each would subsequently see that the + * other did not provide ACK. + * ... + * For either value of RnW: Due to the NACK, the Controller shall defer the + * Private Write or Private Read, and should typically transmit the Target + * Address again after a Repeated START (i.e., the next one or any one prior + * to a STOP in the Frame). Since the Address Header following a Repeated + * START is not arbitrated, the Controller will always win (see Section + * 5.1.2.2.4). + */ + if (retry && addr != 0x7e) { + writel(SVC_I3C_MERRWARN_NACK, master->regs + SVC_I3C_MERRWARN); + } else { + ret = -ENXIO; + *actual_len = 0; + goto emit_stop; + } + } else { + break; + } } /* @@ -1321,7 +1376,7 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master, cmd->addr = ccc->dests[0].addr; cmd->rnw = ccc->rnw; cmd->in = ccc->rnw ? ccc->dests[0].payload.data : NULL; - cmd->out = ccc->rnw ? NULL : ccc->dests[0].payload.data, + cmd->out = ccc->rnw ? NULL : ccc->dests[0].payload.data; cmd->len = xfer_len; cmd->actual_len = actual_len; cmd->continued = false; diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 95fa857e8aadfe3a88d9db85fd70940ff7c624ed..426e3c9f88a16ee6c60f85fef6ef6a95a9ee97e1 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -564,13 +564,11 @@ static int tiadc_parse_dt(struct platform_device *pdev, struct tiadc_device *adc_dev) { struct device_node *node = pdev->dev.of_node; - struct property *prop; - const __be32 *cur; int channels = 0; u32 val; int i; - of_property_for_each_u32(node, "ti,adc-channels", prop, cur, val) { + of_property_for_each_u32(node, "ti,adc-channels", val) { adc_dev->channel_line[channels] = val; /* Set Default values for optional DT parameters */ diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 14b53dac1253bfed5ad2dbd9b1dc22cdb9b80c28..6b04a674f832a048a3e021f92517d8529d27f652 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -46,6 +46,9 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, return 0; if (mt) return mt->num_slots != num_slots ? -EINVAL : 0; + /* Arbitrary limit for avoiding too large memory allocation. */ + if (num_slots > 1024) + return -EINVAL; mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL); if (!mt) diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index 7cb26929dc7328930e09be8416ca67aa9c871a7d..9dc25eb2be445f9d8159240bcbcf3d932366bee0 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -871,7 +871,7 @@ static void cyttsp4_get_mt_touches(struct cyttsp4_mt_data *md, int num_cur_tch) struct cyttsp4_touch tch; int sig; int i, j, t = 0; - int ids[max(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; + int ids[MAX(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; memset(ids, 0, si->si_ofs.tch_abs[CY_TCH_T].max * sizeof(int)); for (i = 0; i < num_cur_tch; i++) { diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 9d9a7fde59e75dcbdbff9fecd5cda23fc9c3faf7..1074ee25064d06024b2f7857522bbc791abed8ba 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -588,9 +588,9 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->tlb = &v1_flush_ops; pgtable->iop.ops.map_pages = iommu_v1_map_pages; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 4b2994b6126df5e2b9bf73b42d204d1660ccaa81..2fce4f6d4e1b234167313c22e530f4ad07930b5d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -277,7 +277,7 @@ static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, */ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || of_device_is_compatible(np, "nvidia,tegra194-smmu")) { - smmu->pgsize_bitmap = PAGE_SIZE; + smmu->pgsize_bitmap &= GENMASK(PAGE_SHIFT, 0); pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; } diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e00c30766fd060ff77b8017934645..81e9cc6e3164a49b3e85a8f96b1d1b2ed6081aaf 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61c5f862a03b3b5fd8cd637e35e14a..3214a4c17c6b3b0883742be3f30e081e6c7f7d32 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e2913330d201c0ab6eb792625a538b1797..222cfc11ebfd00bdefdebed0e7eb967a6f42fe9a 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index ba53571a823905bcd00170506260cd918f967c33..a2f4ffe6d9491c1e495439cd031592dacbdaef1b 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -232,8 +232,8 @@ static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) pgt_size = sprd_iommu_pgt_size(&dom->domain); dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); - dom->sdev = NULL; sprd_iommu_hw_en(dom->sdev, false); + dom->sdev = NULL; } static void sprd_iommu_domain_free(struct iommu_domain *domain) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 072bd227b6c677b40fee80f99e1364d733e681a3..4525366d16d6152886379273391f56ad45149f37 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -111,8 +111,6 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) struct device_node *node = irq_domain_get_of_node(domain); struct irq_chip_generic *gc; struct aic_chip_data *aic; - struct property *prop; - const __be32 *p; u32 hwirq; gc = irq_get_domain_generic_chip(domain, 0); @@ -120,7 +118,7 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) aic = gc->private; aic->ext_irqs |= 1; - of_property_for_each_u32(node, "atmel,external-irqs", prop, p, hwirq) { + of_property_for_each_u32(node, "atmel,external-irqs", hwirq) { gc = irq_get_domain_generic_chip(domain, hwirq); if (!gc) { pr_warn("AIC: external irq %d >= %d skip it\n", diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 9d8f2c40604310d6d92fdea667c2ea0322ece015..b35903a06902f73e719626205b0ba765d2319269 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; static u32 lpic_gsi_to_irq(u32 gsi) { + int irq = 0; + /* Only pch irqdomain transferring is required for LoongArch. */ if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) - return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); - return 0; + return (irq > 0) ? irq : 0; } static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 093fd42893a7aa7a06d353eebf7121f9e1382f36..53cc08387588f106fb696050b8212a268e948517 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -64,6 +64,20 @@ struct mbigen_device { void __iomem *base; }; +static inline unsigned int get_mbigen_node_offset(unsigned int nid) +{ + unsigned int offset = nid * MBIGEN_NODE_OFFSET; + + /* + * To avoid touched clear register in unexpected way, we need to directly + * skip clear register when access to more than 10 mbigen nodes. + */ + if (nid >= (REG_MBIGEN_CLEAR_OFFSET / MBIGEN_NODE_OFFSET)) + offset += MBIGEN_NODE_OFFSET; + + return offset; +} + static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) { unsigned int nid, pin; @@ -72,8 +86,7 @@ static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) nid = hwirq / IRQS_PER_MBIGEN_NODE + 1; pin = hwirq % IRQS_PER_MBIGEN_NODE; - return pin * 4 + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_VEC_OFFSET; + return pin * 4 + get_mbigen_node_offset(nid) + REG_MBIGEN_VEC_OFFSET; } static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, @@ -88,8 +101,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, *mask = 1 << (irq_ofst % 32); ofst = irq_ofst / 32 * 4; - *addr = ofst + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_TYPE_OFFSET; + *addr = ofst + get_mbigen_node_offset(nid) + REG_MBIGEN_TYPE_OFFSET; } static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq, diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index 27e30ce41db3a7d3a1f650cf91076439951b6b41..cd789fa5151909342271f46183aecceb6b007f84 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -178,7 +178,7 @@ struct meson_gpio_irq_controller { void __iomem *base; u32 channel_irqs[MAX_NUM_CHANNEL]; DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); - spinlock_t lock; + raw_spinlock_t lock; }; static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, @@ -187,14 +187,14 @@ static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, unsigned long flags; u32 tmp; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); tmp = readl_relaxed(ctl->base + reg); tmp &= ~mask; tmp |= val; writel_relaxed(tmp, ctl->base + reg); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); } static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) @@ -244,12 +244,12 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, unsigned long flags; unsigned int idx; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); if (idx >= ctl->params->nr_channels) { - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; } @@ -257,7 +257,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* Mark the channel as used */ set_bit(idx, ctl->channel_map); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); /* * Setup the mux of the channel to route the signal of the pad @@ -567,7 +567,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node * if (!ctl) return -ENOMEM; - spin_lock_init(&ctl->lock); + raw_spin_lock_init(&ctl->lock); ctl->base = of_iomap(node, 0); if (!ctl->base) { diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c index 1d9bb28d13e5d95c93085533938382825cea7a07..eb6ca516a166454eafa80c805d08857020133f21 100644 --- a/drivers/irqchip/irq-pic32-evic.c +++ b/drivers/irqchip/irq-pic32-evic.c @@ -161,9 +161,9 @@ static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq, return ret; } -int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type) +static int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) { struct evic_chip_data *priv = d->host_data; @@ -190,13 +190,11 @@ static void __init pic32_ext_irq_of_init(struct irq_domain *domain) { struct device_node *node = irq_domain_get_of_node(domain); struct evic_chip_data *priv = domain->host_data; - struct property *prop; - const __le32 *p; u32 hwirq; int i = 0; const char *pname = "microchip,external-irqs"; - of_property_for_each_u32(node, pname, prop, p, hwirq) { + of_property_for_each_u32(node, pname, hwirq) { if (i >= ARRAY_SIZE(priv->ext_irqs)) { pr_warn("More than %d external irq, skip rest\n", ARRAY_SIZE(priv->ext_irqs)); diff --git a/drivers/irqchip/irq-riscv-aplic-msi.c b/drivers/irqchip/irq-riscv-aplic-msi.c index 028444af48bd57d4dd39bd1391cddac5d14a17a8..d7773f76e5d0a531e4b43332a18b1c4f4d248987 100644 --- a/drivers/irqchip/irq-riscv-aplic-msi.c +++ b/drivers/irqchip/irq-riscv-aplic-msi.c @@ -32,15 +32,10 @@ static void aplic_msi_irq_unmask(struct irq_data *d) aplic_irq_unmask(d); } -static void aplic_msi_irq_eoi(struct irq_data *d) +static void aplic_msi_irq_retrigger_level(struct irq_data *d) { struct aplic_priv *priv = irq_data_get_irq_chip_data(d); - /* - * EOI handling is required only for level-triggered interrupts - * when APLIC is in MSI mode. - */ - switch (irqd_get_trigger_type(d)) { case IRQ_TYPE_LEVEL_LOW: case IRQ_TYPE_LEVEL_HIGH: @@ -59,6 +54,29 @@ static void aplic_msi_irq_eoi(struct irq_data *d) } } +static void aplic_msi_irq_eoi(struct irq_data *d) +{ + /* + * EOI handling is required only for level-triggered interrupts + * when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); +} + +static int aplic_msi_irq_set_type(struct irq_data *d, unsigned int type) +{ + int rc = aplic_irq_set_type(d, type); + + if (rc) + return rc; + /* + * Updating sourcecfg register for level-triggered interrupts + * requires interrupt retriggering when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); + return 0; +} + static void aplic_msi_write_msg(struct irq_data *d, struct msi_msg *msg) { unsigned int group_index, hart_index, guest_index, val; @@ -130,7 +148,7 @@ static const struct msi_domain_template aplic_msi_template = { .name = "APLIC-MSI", .irq_mask = aplic_msi_irq_mask, .irq_unmask = aplic_msi_irq_unmask, - .irq_set_type = aplic_irq_set_type, + .irq_set_type = aplic_msi_irq_set_type, .irq_eoi = aplic_msi_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/drivers/irqchip/irq-sun6i-r.c b/drivers/irqchip/irq-sun6i-r.c index a01e440494154ecbf93e07a0fcb5f052ecb478a9..99958d470d6268cb5654a333b34eeb31e7ec37fc 100644 --- a/drivers/irqchip/irq-sun6i-r.c +++ b/drivers/irqchip/irq-sun6i-r.c @@ -270,7 +270,7 @@ static const struct irq_domain_ops sun6i_r_intc_domain_ops = { static int sun6i_r_intc_suspend(void) { - u32 buf[BITS_TO_U32(max(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; + u32 buf[BITS_TO_U32(MAX(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; int i; /* Wake IRQs are enabled during system sleep and shutdown. */ diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 238d3d3449496975ec9e1164cc898ac7baf02e54..7e08714d507f4728b1a53423dfc9c5c2fb87188f 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -189,7 +189,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, irqc->intr_mask = 0; } - if (irqc->intr_mask >> irqc->nr_irq) + if ((u64)irqc->intr_mask >> irqc->nr_irq) pr_warn("irq-xilinx: mismatch in kind-of-intr param\n"); pr_info("irq-xilinx: %pOF: num_irq=%d, edge=0x%x\n", diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 48ac628f471b3ba8390992b99bbd94e522669965..51e6964c130548ddc2a596e8b1e14f7a007310dc 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1776,7 +1776,7 @@ static void integrity_metadata(struct work_struct *w) struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); char *checksums; unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; - char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; sector_t sector; unsigned int sectors_to_process; @@ -2064,7 +2064,7 @@ retry_kmap: } while (++s < ic->sectors_per_block); #ifdef INTERNAL_VERIFY if (ic->internal_hash) { - char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { @@ -2837,7 +2837,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start unlikely(from_replay) && #endif ic->internal_hash) { - char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c2c07bfa647199139f78522c5c87da3e170e6cee..f299ff393a6a2ceda77bcc566f4d121123d9508c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *param) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; - if (!dm_suspended_md(md)) - dm_suspend(md, suspend_flags); + if (!dm_suspended_md(md)) { + r = dm_suspend(md, suspend_flags); + if (r) { + down_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (hc && !hc->new_map) { + hc->new_map = new_map; + new_map = NULL; + } else { + r = -ENXIO; + } + up_write(&_hash_lock); + if (new_map) { + dm_sync_table(md); + dm_table_destroy(new_map); + } + dm_put(md); + return r; + } + } old_size = dm_get_size(md); old_map = dm_swap_table(md, new_map); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97fab2087df86d5b0fba0dc4ce5125499e4b4ca6..87bb903034358234b3112bc9695a9b46403838d7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2737,7 +2737,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } @@ -2762,7 +2762,7 @@ static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_st break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 04698fd03e6069eb05ddce43aa13cd9f96fdbf23..d48c4fafc779892fbcf66113bb3bce51b98054eb 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - kfree(smm); + kvfree(smm); } static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) @@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; - smm = kmalloc(sizeof(*smm), GFP_KERNEL); + smm = kvmalloc(sizeof(*smm), GFP_KERNEL); if (!smm) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8daa6757f66b287ddc59b5c5614cd4..761989d6790683317102cab48efd20e96adb2f89 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ diff --git a/drivers/media/dvb-frontends/stv0367_priv.h b/drivers/media/dvb-frontends/stv0367_priv.h index 617f605947b2c47e74d95eb3392d61d82ab9619b..7f056d1cce8228d97ce503905b3c673afdf153f8 100644 --- a/drivers/media/dvb-frontends/stv0367_priv.h +++ b/drivers/media/dvb-frontends/stv0367_priv.h @@ -25,8 +25,11 @@ #endif /* MACRO definitions */ +#ifndef MIN #define MAX(X, Y) ((X) >= (Y) ? (X) : (Y)) #define MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) +#endif + #define INRANGE(X, Y, Z) \ ((((X) <= (Y)) && ((Y) <= (Z))) || \ (((Z) <= (Y)) && ((Y) <= (X))) ? 1 : 0) diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig index 154343080c82a64106f0fca20cdf54cb9c2a21f4..40e20f0aa5ae5d164c8c29fc06c8cd11acc57fc0 100644 --- a/drivers/media/pci/intel/ipu6/Kconfig +++ b/drivers/media/pci/intel/ipu6/Kconfig @@ -3,13 +3,14 @@ config VIDEO_INTEL_IPU6 depends on ACPI || COMPILE_TEST depends on VIDEO_DEV depends on X86 && X86_64 && HAS_DMA + depends on IPU_BRIDGE || !IPU_BRIDGE + select AUXILIARY_BUS select DMA_OPS select IOMMU_IOVA select VIDEO_V4L2_SUBDEV_API select MEDIA_CONTROLLER select VIDEOBUF2_DMA_CONTIG select V4L2_FWNODE - select IPU_BRIDGE help This is the 6th Gen Intel Image Processing Unit, found in Intel SoCs and used for capturing images and video from camera sensors. diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 52aea41677183797d06ce632d28356fed59c870a..717c441b4a86541f0471084aafd6e9bd38fdf7ee 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -828,8 +828,10 @@ struct rc_dev *rc_dev_get_from_fd(int fd, bool write) return ERR_PTR(-EINVAL); } - if (write && !(f.file->f_mode & FMODE_WRITE)) + if (write && !(f.file->f_mode & FMODE_WRITE)) { + fdput(f); return ERR_PTR(-EPERM); + } fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index 22d83ac18eb7353279b5a5e6e5ba8a8c8f53d924..fbf58012becdf2a40716d68bfc1c17136f423709 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); -static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - int ret; - - ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - } - return 0; -} - -static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - } -} - static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) { struct dvb_usb_adapter *adap; int ret, n, o; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint); - if (ret) - return ret; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response); - if (ret) - return ret; for (n = 0; n < d->props.num_adapters; n++) { adap = &d->adapter[n]; adap->dev = d; @@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) * when reloading the driver w/o replugging the device * sometimes a timeout occurs, this helps */ - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint); - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response); + if (d->props.generic_bulk_ctrl_endpoint != 0) { + usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + } return 0; diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 0136df5732bac5d6600f035641e07689e14782cd..4fe26e82e3d1c167bd59c5decf79a8391ae465a4 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) { const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i]; + if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) || + ctrl->info.selector != mapping->selector) + continue; + /* Let the device provide a custom mapping. */ if (mapping->filter_mapping) { mapping = mapping->filter_mapping(chain, ctrl); @@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, continue; } - if (uvc_entity_match_guid(ctrl->entity, mapping->entity) && - ctrl->info.selector == mapping->selector) - __uvc_ctrl_add_mapping(chain, ctrl, mapping); + __uvc_ctrl_add_mapping(chain, ctrl, mapping); } } diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 4bbd542d753e4d89272924c5e097c0a2a6d885cf..0c1364d88469f53c1486d5119c7451e75c80b86b 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -119,8 +119,6 @@ static int ti_tscadc_probe(struct platform_device *pdev) struct clk *clk; struct device_node *node; struct mfd_cell *cell; - struct property *prop; - const __be32 *cur; bool use_tsc = false, use_mag = false; u32 val; int err; @@ -167,7 +165,7 @@ static int ti_tscadc_probe(struct platform_device *pdev) } node = of_get_child_by_name(pdev->dev.of_node, "adc"); - of_property_for_each_u32(node, "ti,adc-channels", prop, cur, val) { + of_property_for_each_u32(node, "ti,adc-channels", val) { adc_channels++; if (val > 7) { dev_err(&pdev->dev, " PIN numbers are 0..7 (not %d)\n", diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 41c3d2821a78505ce74eca2012700368c7381305..41c54051347ab7fc3bb8691595b35bdb67a7d40e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -587,7 +587,7 @@ config NSM config MARVELL_CN10K_DPI tristate "Octeon CN10K DPI driver" - depends on PCI + depends on PCI && PCI_IOV depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enables Octeon CN10K DMA packet interface (DPI) driver which diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index d4aeeb2b2169204183ddfeeafcbb58691222b24b..89224d4af4a201b0e6b7a4437cc90eb95c6e7b11 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -233,6 +233,49 @@ static void ee1004_cleanup_bus_data(void *data) mutex_unlock(&ee1004_bus_lock); } +static int ee1004_init_bus_data(struct i2c_client *client) +{ + struct ee1004_bus_data *bd; + int err, cnr = 0; + + bd = ee1004_get_bus_data(client->adapter); + if (!bd) + return dev_err_probe(&client->dev, -ENOSPC, "Only %d busses supported", + EE1004_MAX_BUSSES); + + i2c_set_clientdata(client, bd); + + if (++bd->dev_count == 1) { + /* Use 2 dummy devices for page select command */ + for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { + struct i2c_client *cl; + + cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_out; + } + + bd->set_page[cnr] = cl; + } + + /* Remember current page to avoid unneeded page select */ + err = ee1004_get_current_page(bd); + if (err < 0) + goto err_out; + + dev_dbg(&client->dev, "Currently selected page: %d\n", err); + bd->current_page = err; + } + + return 0; + +err_out: + ee1004_cleanup(cnr, bd); + + return err; +} + static int ee1004_probe(struct i2c_client *client) { struct nvmem_config config = { @@ -251,9 +294,8 @@ static int ee1004_probe(struct i2c_client *client) .compat = true, .base_dev = &client->dev, }; - struct ee1004_bus_data *bd; struct nvmem_device *ndev; - int err, cnr = 0; + int err; /* Make sure we can operate on this adapter */ if (!i2c_check_functionality(client->adapter, @@ -264,46 +306,21 @@ static int ee1004_probe(struct i2c_client *client) mutex_lock(&ee1004_bus_lock); - bd = ee1004_get_bus_data(client->adapter); - if (!bd) { + err = ee1004_init_bus_data(client); + if (err < 0) { mutex_unlock(&ee1004_bus_lock); - return dev_err_probe(&client->dev, -ENOSPC, - "Only %d busses supported", EE1004_MAX_BUSSES); - } - - err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, bd); - if (err < 0) return err; - - i2c_set_clientdata(client, bd); - - if (++bd->dev_count == 1) { - /* Use 2 dummy devices for page select command */ - for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { - struct i2c_client *cl; - - cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); - if (IS_ERR(cl)) { - mutex_unlock(&ee1004_bus_lock); - return PTR_ERR(cl); - } - bd->set_page[cnr] = cl; - } - - /* Remember current page to avoid unneeded page select */ - err = ee1004_get_current_page(bd); - if (err < 0) { - mutex_unlock(&ee1004_bus_lock); - return err; - } - dev_dbg(&client->dev, "Currently selected page: %d\n", err); - bd->current_page = err; } ee1004_probe_temp_sensor(client); mutex_unlock(&ee1004_bus_lock); + err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, + i2c_get_clientdata(client)); + if (err < 0) + return err; + ndev = devm_nvmem_register(&client->dev, &config); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3fc497b62c51de6c7d74987339f40..339d126414d4bf842d08c593e15bd6a3954bbf38 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 5cd488f54cfa542ec3370030b0afcda2d75335c5..8f744bee6fbde83f196d71f0ce33447a0baeaeb5 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { @@ -400,6 +415,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(REFCOUNT_DEC_NEGATIVE), CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_ZERO), CRASHTYPE(REFCOUNT_INC_ZERO), CRASHTYPE(REFCOUNT_ADD_ZERO), CRASHTYPE(REFCOUNT_INC_SATURATED), diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index f82e3423acb9fb92c3d46bc92a1a1f6d31e66e83..60d0155be869f4bf2a0e9da5cd37985f76ff713f 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -390,7 +390,8 @@ int ubiblock_create(struct ubi_volume_info *vi) ret = blk_mq_alloc_tag_set(&dev->tag_set); if (ret) { - dev_err(disk_to_dev(dev->gd), "blk_mq_alloc_tag_set failed"); + pr_err("ubiblock%d_%d: blk_mq_alloc_tag_set failed\n", + dev->ubi_num, dev->vol_id); goto out_free_dev; } @@ -407,8 +408,8 @@ int ubiblock_create(struct ubi_volume_info *vi) gd->minors = 1; gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL); if (gd->first_minor < 0) { - dev_err(disk_to_dev(gd), - "block: dynamic minor allocation failed"); + pr_err("ubiblock%d_%d: block: dynamic minor allocation failed\n", + dev->ubi_num, dev->vol_id); ret = -ENODEV; goto out_cleanup_disk; } @@ -669,7 +670,7 @@ err_unreg: return ret; } -void __exit ubiblock_exit(void) +void ubiblock_exit(void) { ubi_unregister_volume_notifier(&ubiblock_notifier); ubiblock_remove_all(); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a7e3a6246c0e93309805c588391640936993bce5..30be4ed68fad297c3c8f6ba8f0214345f9ae3ffe 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -112,7 +112,7 @@ static struct attribute *ubi_class_attrs[] = { ATTRIBUTE_GROUPS(ubi_class); /* Root UBI "class" object (corresponds to '//class/ubi/') */ -struct class ubi_class = { +const struct class ubi_class = { .name = UBI_NAME_STR, .class_groups = ubi_class_groups, }; @@ -1372,7 +1372,7 @@ static int __init ubi_init(void) /* See comment above re-ubi_is_module(). */ if (ubi_is_module()) - goto out_slab; + goto out_debugfs; } register_mtd_user(&ubi_mtd_notifier); @@ -1387,6 +1387,9 @@ static int __init ubi_init(void) out_mtd_notifier: unregister_mtd_user(&ubi_mtd_notifier); + ubiblock_exit(); +out_debugfs: + ubi_debugfs_exit(); out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index d57f52bd2ff3cd7a20b293b3993874a4826cffb2..9ec3b8b6a0aa8f18eeb3e4837cb50b68ee258e5d 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -598,9 +598,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN, UBI_DFS_DIR_NAME, ubi->ubi_num); - if (n > UBI_DFS_DIR_LEN) { + if (n >= UBI_DFS_DIR_LEN) { /* The array size is too small */ return -EINVAL; } diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index e5ac3cd0bbae62494846a3514548ff163439163f..c7ba7a15c9f78b6214b245b5e8969c1214fbd4fd 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1564,6 +1564,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, GFP_KERNEL); if (!fm_eba[i]) { ret = -ENOMEM; + kfree(scan_eba[i]); goto out_free; } @@ -1599,7 +1600,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, } out_free: - for (i = 0; i < num_volumes; i++) { + while (--i >= 0) { if (!ubi->volumes[i]) continue; diff --git a/drivers/mtd/ubi/nvmem.c b/drivers/mtd/ubi/nvmem.c index 8aeb9c428e51011cfc146c30339d96bfdda2ea50..a94a1a9aaec10e4add3154e2bbec572f200439ac 100644 --- a/drivers/mtd/ubi/nvmem.c +++ b/drivers/mtd/ubi/nvmem.c @@ -6,7 +6,6 @@ /* UBI NVMEM provider */ #include "ubi.h" #include -#include /* List of all NVMEM devices */ static LIST_HEAD(nvmem_devices); @@ -27,14 +26,15 @@ static int ubi_nvmem_reg_read(void *priv, unsigned int from, struct ubi_nvmem *unv = priv; struct ubi_volume_desc *desc; uint32_t offs; - uint64_t lnum = from; + uint32_t lnum; int err = 0; desc = ubi_open_volume(unv->ubi_num, unv->vol_id, UBI_READONLY); if (IS_ERR(desc)) return PTR_ERR(desc); - offs = do_div(lnum, unv->usable_leb_size); + offs = from % unv->usable_leb_size; + lnum = from / unv->usable_leb_size; while (bytes_left) { to_read = unv->usable_leb_size - offs; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 32009a24869e7e49584e352ef37c52bba8c2cc4c..1c9e874e8edeae982b3f97ee643090fc97c93886 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -420,7 +420,7 @@ struct ubi_debug_info { unsigned int power_cut_min; unsigned int power_cut_max; unsigned int emulate_failures; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBI_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_chk_gen; struct dentry *dfs_chk_io; @@ -814,7 +814,7 @@ extern struct kmem_cache *ubi_wl_entry_slab; extern const struct file_operations ubi_ctrl_cdev_operations; extern const struct file_operations ubi_cdev_operations; extern const struct file_operations ubi_vol_cdev_operations; -extern struct class ubi_class; +extern const struct class ubi_class; extern struct mutex ubi_devices_mutex; extern struct blocking_notifier_head ubi_notifiers; diff --git a/drivers/net/can/usb/etas_es58x/es58x_devlink.c b/drivers/net/can/usb/etas_es58x/es58x_devlink.c index 635edeb8f68cdf5e33132027331fb77ea66dedbf..eee20839d96fd4142d43e26ddf86941051d7bb5a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_devlink.c +++ b/drivers/net/can/usb/etas_es58x/es58x_devlink.c @@ -215,7 +215,7 @@ static int es58x_devlink_info_get(struct devlink *devlink, struct es58x_sw_version *fw_ver = &es58x_dev->firmware_version; struct es58x_sw_version *bl_ver = &es58x_dev->bootloader_version; struct es58x_hw_revision *hw_rev = &es58x_dev->hardware_revision; - char buf[max(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; + char buf[MAX(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; int ret = 0; if (es58x_sw_version_is_valid(fw_ver)) { diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ed1e6560df25ec270d1716672f42f6861e06c443..0e663ec0c12a3bcbd865f29b13596dae3b6a6a30 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -675,8 +675,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->user_mii_bus); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b074b4bb062964850042634732438682f43e192d..1491099528be8d3dac61612f980f69c698c1beab 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2578,7 +2578,11 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; + case KSZ8567_CHIP_ID: case KSZ9477_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: /* KSZ9477 Errata DS80000754C * * Module 4: Energy Efficient Ethernet (EEE) feature select must @@ -2588,6 +2592,13 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) * controls. If not disabled, the PHY ports can auto-negotiate * to enable EEE, and this feature can cause link drops when * linked to another device supporting EEE. + * + * The same item appears in the errata for the KSZ9567, KSZ9896, + * and KSZ9897. + * + * A similar item appears in the errata for the KSZ8567, but + * provides an alternative workaround. For now, use the simple + * workaround of disabling the EEE feature for this device too. */ return MICREL_NO_EEE; } @@ -3764,6 +3775,11 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, return -EBUSY; } + /* Need to initialize variable as the code to fill in settings may + * not be executed. + */ + wol.wolopts = 0; + ksz_get_wol(ds, dp->index, &wol); if (wol.wolopts & WAKE_MAGIC) { dev_err(ds->dev, diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index d9d3e30fd47ad4d357224be4024a7afe9940a795..e3f95d2cc2c1689557f463edcdea4f4fb559f6a2 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -40,6 +40,10 @@ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ +/* MII Block subblock */ +#define VSC73XX_BLOCK_MII_INTERNAL 0x0 /* Internal MDIO subblock */ +#define VSC73XX_BLOCK_MII_EXTERNAL 0x1 /* External MDIO subblock */ + #define CPU_PORT 6 /* CPU port */ /* MAC Block registers */ @@ -225,6 +229,8 @@ #define VSC73XX_MII_CMD 0x1 #define VSC73XX_MII_DATA 0x2 +#define VSC73XX_MII_STAT_BUSY BIT(3) + /* Arbiter block 5 registers */ #define VSC73XX_ARBEMPTY 0x0c #define VSC73XX_ARBDISC 0x0e @@ -299,6 +305,7 @@ #define IS_739X(a) (IS_7395(a) || IS_7398(a)) #define VSC73XX_POLL_SLEEP_US 1000 +#define VSC73XX_MDIO_POLL_SLEEP_US 5 #define VSC73XX_POLL_TIMEOUT_US 10000 struct vsc73xx_counter { @@ -527,6 +534,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc) return 0; } +static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc) +{ + int ret, err; + u32 val; + + ret = read_poll_timeout(vsc73xx_read, err, + err < 0 || !(val & VSC73XX_MII_STAT_BUSY), + VSC73XX_MDIO_POLL_SLEEP_US, + VSC73XX_POLL_TIMEOUT_US, false, vsc, + VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL, + VSC73XX_MII_STAT, &val); + if (ret) + return ret; + return err; +} + static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) { struct vsc73xx *vsc = ds->priv; @@ -534,12 +557,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) u32 val; int ret; + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + /* Setting bit 26 means "read" */ cmd = BIT(26) | (phy << 21) | (regnum << 16); ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; - msleep(2); + + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val); if (ret) return ret; @@ -563,18 +594,11 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, u32 cmd; int ret; - /* It was found through tedious experiments that this router - * chip really hates to have it's PHYs reset. They - * never recover if that happens: autonegotiation stops - * working after a reset. Just filter out this command. - * (Resetting the whole chip is OK.) - */ - if (regnum == 0 && (val & BIT(15))) { - dev_info(vsc->dev, "reset PHY - disallowed\n"); - return 0; - } + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; - cmd = (phy << 21) | (regnum << 16); + cmd = (phy << 21) | (regnum << 16) | val; ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; @@ -957,6 +981,11 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, if (duplex == DUPLEX_FULL) val |= VSC73XX_MAC_CFG_FDX; + else + /* In datasheet description ("Port Mode Procedure" in 5.6.2) + * this bit is configured only for half duplex. + */ + val |= VSC73XX_MAC_CFG_WEXC_DIS; /* This routine is described in the datasheet (below ARBDISC register * description) @@ -967,7 +996,6 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, get_random_bytes(&seed, 1); val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET; val |= VSC73XX_MAC_CFG_SEED_LOAD; - val |= VSC73XX_MAC_CFG_WEXC_DIS; /* Those bits are responsible for MTU only. Kernel takes care about MTU, * let's enable +8 bytes frame length unconditionally. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ffa74c26ee5325526870c36b39212cc3a7b327cd..e27e1082ee33aea2cdd69bf0f286651c83bc940f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7591,19 +7591,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) int rx = bp->rx_nr_rings, stat; int vnic, grp = rx; - if (hw_resc->resv_tx_rings != bp->tx_nr_rings && - bp->hwrm_spec_code >= 0x10601) - return true; - /* Old firmware does not need RX ring reservations but we still * need to setup a default RSS map when needed. With new firmware * we go through RX ring reservations first and then set up the * RSS map for the successfully reserved RX rings when needed. */ - if (!BNXT_NEW_RM(bp)) { + if (!BNXT_NEW_RM(bp)) bnxt_check_rss_tbl_no_rmgr(bp); + + if (hw_resc->resv_tx_rings != bp->tx_nr_rings && + bp->hwrm_spec_code >= 0x10601) + return true; + + if (!BNXT_NEW_RM(bp)) return false; - } vnic = bnxt_get_total_vnics(bp, rx); @@ -7649,8 +7650,8 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num); static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int rx_rings, old_rx_rings, rc; int cp = bp->cp_nr_rings; - int rx_rings, rc; int ulp_msix = 0; bool sh = false; int tx_cp; @@ -7684,6 +7685,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.grp = bp->rx_nr_rings; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); hwr.stat = bnxt_get_func_stat_ctxs(bp); + old_rx_rings = bp->hw_resc.resv_rx_rings; rc = bnxt_hwrm_reserve_rings(bp, &hwr); if (rc) @@ -7738,7 +7740,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!bnxt_rings_ok(bp, &hwr)) return -ENOMEM; - if (!netif_is_rxfh_configured(bp->dev)) + if (old_rx_rings != bp->hw_resc.resv_rx_rings && + !netif_is_rxfh_configured(bp->dev)) bnxt_set_dflt_rss_indir_tbl(bp, NULL); if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d00ef0063820d74483f2c088f014d1f499baf77c..9dadc89378f02ee4ee628bb289366322eaa9d5f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1863,8 +1863,14 @@ static void bnxt_modify_rss(struct bnxt *bp, struct ethtool_rxfh_context *ctx, } static int bnxt_rxfh_context_check(struct bnxt *bp, + const struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); return -EOPNOTSUPP; @@ -1888,7 +1894,7 @@ static int bnxt_create_rxfh_context(struct net_device *dev, struct bnxt_vnic_info *vnic; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -1915,8 +1921,12 @@ static int bnxt_create_rxfh_context(struct net_device *dev, if (rc) goto out; + /* Populate defaults in the context */ bnxt_set_dflt_rss_indir_tbl(bp, ctx); + ctx->hfunc = ETH_RSS_HASH_TOP; memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + memcpy(ethtool_rxfh_context_key(ctx), + bp->rss_hash_key, HW_HASH_KEY_SIZE); rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); if (rc) { @@ -1953,7 +1963,7 @@ static int bnxt_modify_rxfh_context(struct net_device *dev, struct bnxt_rss_ctx *rss_ctx; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -5280,7 +5290,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, .cap_rss_ctx_supported = 1, - .rxfh_max_context_id = BNXT_MAX_ETH_RSS_CTX, + .rxfh_max_num_contexts = BNXT_MAX_ETH_RSS_CTX + 1, .rxfh_indir_space = BNXT_MAX_RSS_TABLE_ENTRIES_P5, .rxfh_priv_size = sizeof(struct bnxt_rss_ctx), .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1248792d7fd4d2f98847889c499277a92229d785..0715ea5bf13ed9df75eeb4f50e99ada8cef631e9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - if (dev->phydev) { + if (dev->phydev) phy_ethtool_get_wol(dev->phydev, wol); - if (wol->supported) - return; - } - if (!device_can_wakeup(kdev)) { - wol->supported = 0; - wol->wolopts = 0; + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) return; - } - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11665be3a22c7a352578d608ee093a3f2aea7b95..dcd3f54ed0cf009ea0813c00ed2a6faf4a5f2399 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5250,8 +5250,8 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wol & MACB_WOL_ENABLED) { /* Check for IP address in WOL ARP mode */ idev = __in_dev_get_rcu(bp->dev); - if (idev && idev->ifa_list) - ifa = rcu_access_pointer(idev->ifa_list); + if (idev) + ifa = rcu_dereference(idev->ifa_list); if ((bp->wolopts & WAKE_ARP) && !ifa) { netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index a40c266c37f207c1966375058b8117c81f2629fe..608cc6af5af1c7b01e68babfa190f453b7b1cdcc 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1054,18 +1054,12 @@ static int phy_interface_mode(u8 lmac_type) static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { - struct lmac *lmac, **priv; + struct lmac *lmac; u64 cfg; lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - lmac->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); - if (!lmac->netdev) - return -ENOMEM; - priv = netdev_priv(lmac->netdev); - *priv = lmac; - if ((lmac->lmac_type == BGX_MODE_SGMII) || (lmac->lmac_type == BGX_MODE_QSGMII) || (lmac->lmac_type == BGX_MODE_RGMII)) { @@ -1191,7 +1185,6 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); - free_netdev(lmac->netdev); lmac->phydev = NULL; } @@ -1653,6 +1646,23 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_get_qlm_mode(bgx); + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + struct lmac *lmacp, **priv; + + lmacp = &bgx->lmac[lmac]; + lmacp->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); + + if (!lmacp->netdev) { + for (int i = 0; i < lmac; i++) + free_netdev(bgx->lmac[i].netdev); + err = -ENOMEM; + goto err_enable; + } + + priv = netdev_priv(lmacp->netdev); + *priv = lmacp; + } + err = bgx_init_phy(bgx); if (err) goto err_enable; @@ -1692,8 +1702,10 @@ static void bgx_remove(struct pci_dev *pdev) u8 lmac; /* Disable all LMACs */ - for (lmac = 0; lmac < bgx->lmac_count; lmac++) + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { bgx_lmac_disable(bgx, lmac); + free_netdev(bgx->lmac[lmac].netdev); + } pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index e32f6724f5681627dde69b6502583f8aeb02ca3a..2e4f3e1782a2520c45912b414fd340db298b1fe7 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -775,6 +775,9 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); hrtimer_cancel(&fep->perout_timer); if (fep->ptp_clock) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3480ff5c7ed6a7b8828c559e055a6154dd15cc7e..5a8b490ab3ad0861955dd4a440022104dfd18b62 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -495,7 +495,7 @@ static int gve_set_channels(struct net_device *netdev, return -EINVAL; } - if (!netif_carrier_ok(netdev)) { + if (!netif_running(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; return 0; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9744b426940e46c75da826c97b2afa44fe2b1ee0..661566db68c86018c0b46b4427d41e819db09b4f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv, rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; - if (netif_carrier_ok(priv->dev)) { + if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { + if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) goto revert_features; @@ -2359,7 +2359,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a5fc0209d628eb921e6cbe0504309529c50d58a3..4cbc4d069a1f369eaec2fec6b3412a0fce186782 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + hns3_nic_net_stop(netdev); + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index e132c2f09560984139c8089635272099cfb290d9..cc7f46c0b35ff64582098a2ef74e227b18a5c36b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1598,8 +1598,7 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) { u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; struct hclge_mod_reg_common_msg msg; - u8 i, j, num; - u32 loop_time; + u8 i, j, num, loop_time; num = ARRAY_SIZE(hclge_ssu_reg_common_msg); for (i = 0; i < num; i++) { @@ -1609,7 +1608,8 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) loop_time = 1; loop_para[0] = 0; if (msg.need_para) { - loop_time = hdev->ae_dev->dev_specs.tnl_num; + loop_time = min(hdev->ae_dev->dev_specs.tnl_num, + HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE); for (j = 0; j < loop_time; j++) loop_para[j] = j + 1; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 82574ce0194fbaceab0b78b4159ff68b6cc4ae29..6c33195a1168f8b6af468b0e05d36cc5a9b3dde9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2653,8 +2653,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + + if (ret) + return ret; - return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + hdev->hw.mac.req_speed = speed; + hdev->hw.mac.req_duplex = duplex; + + return 0; } static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) @@ -2956,17 +2965,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) hdev->hw.mac.duplex = HCLGE_MAC_FULL; - ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, - hdev->hw.mac.duplex, hdev->hw.mac.lane_num); - if (ret) - return ret; - if (hdev->hw.mac.support_autoneg) { ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); if (ret) return ret; } + if (!hdev->hw.mac.autoneg) { + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed, + hdev->hw.mac.req_duplex, + hdev->hw.mac.lane_num); + if (ret) + return ret; + } + mac->link = 0; if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { @@ -11444,7 +11456,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_free_irq_vectors(pdev); - pci_release_mem_regions(pdev); + pci_release_regions(pdev); pci_disable_device(pdev); } @@ -11516,8 +11528,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static void hclge_clear_resetting_state(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 85fb11de43a12457879f95c27e7ca4b14355c234..80079657afebe0add970be55425deaa87b46e3b4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev) if (ret) netdev_err(netdev, "failed to adjust link.\n"); + hdev->hw.mac.req_speed = (u32)speed; + hdev->hw.mac.req_duplex = (u8)duplex; + ret = hclge_cfg_flowctrl(hdev); if (ret) netdev_err(netdev, "failed to configure flow control.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3735d2fed11f75ea44070700db9c15f928489a24..094a7c7b55921f9a0ddb3637ebe8d5ca9eb46c38 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 99a75a59078ef3e6c71c46696b694d00fd082ea1..caaa10157909ee8bcd01f525ace63b55184b5090 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -765,18 +765,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, } /** - * ice_xsk_pool - get XSK buffer pool bound to a ring + * ice_rx_xsk_pool - assign XSK buff pool to Rx ring * @ring: Rx ring to use * - * Returns a pointer to xsk_buff_pool structure if there is a buffer pool - * present, NULL otherwise. + * Sets XSK buff pool pointer on Rx ring. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) +static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - return ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** @@ -801,7 +800,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) if (!ring) return; - ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5d396c1a7731482f725561a8eff709ecd3cc793e..1facf179a96fd6a5f11ebfa49d47f9d55756a163 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -536,7 +536,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return err; } - ring->xsk_pool = ice_xsk_pool(ring); + ice_rx_xsk_pool(ring); if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -597,7 +597,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return 0; } - ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs); if (!ok) { u16 pf_q = ring->vsi->rxq_map[ring->q_index]; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8c990c9761329954e01bce0f7d389771f9c66b9d..bc79ba974e4958e171b5f708578fff5e1911a37c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4673,10 +4673,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, if (err) return err; - fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) + - fec_corr_low_val; - fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) + - fec_uncorr_low_val; + fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) + + fec_corr_low_val; + fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) + + fec_uncorr_low_val; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ec636be4d17d1e9b896380ba0223e4b12298b789..6f97ed471fe9a190cf82f10dfc0ed0f1b3942f3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -559,6 +559,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ @@ -2948,7 +2950,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) { struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; - if (rx_ring->xsk_pool) + if (READ_ONCE(rx_ring->xsk_pool)) napi_schedule(&rx_ring->q_vector->napi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e2786cc1328611a4b2dae3b5ba88211d2e7e57bc..ef2e858f49bb0ed12302454177a2640b07ec7161 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; + /* Skip HW writes if reset is in progress */ + if (pf->hw.reset_ongoing) + return; + switch (hw->ptp.phy_model) { case ICE_PHY_E810: /* Do not reconfigure E810 PHY */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8bb743f78fcb4771d852293dfcbbd3b28dbf3726..8d25b69812698285215a422f5b02e2dbe460e48a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - rx_ring->xdp_prog = NULL; + WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; @@ -1521,10 +1521,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ ice_for_each_tx_ring(tx_ring, q_vector->tx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool); bool wd; - if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring); + if (xsk_pool) + wd = ice_xmit_zc(tx_ring, xsk_pool); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1550,6 +1551,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) budget_per_ring = budget; ice_for_each_rx_ring(rx_ring, q_vector->rx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool); int cleaned; /* A dedicated path for zero-copy allows making a single @@ -1557,7 +1559,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * ice_clean_rx_irq function and makes the codebase cleaner. */ cleaned = rx_ring->xsk_pool ? - ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) : ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a65955eb23c0bd85adc7f0f8f1a4b39867da8907..240a7bec242be349353be039326f9fe098f5bdbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) { - synchronize_rcu(); + if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, * ice_qvec_cfg_msix - Enable IRQ for given queue vector * @vsi: the VSI that contains queue vector * @q_vector: queue vector + * @qid: queue index */ static void -ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) { u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; + int q, _qid = qid; ice_cfg_itr(hw, q_vector); - ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, - q_vector->tx.itr_idx); + for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx); + _qid++; + } + + _qid = qid; - ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, - q_vector->rx.itr_idx); + for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx); + _qid++; + } ice_flush(hw); } @@ -164,6 +166,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; int timeout = 50; + int fail = 0; int err; if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) @@ -180,15 +183,17 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) usleep_range(1000, 2000); } + synchronize_net(); + netif_carrier_off(vsi->netdev); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_qvec_dis_irq(vsi, rx_ring, q_vector); ice_qvec_toggle_napi(vsi, q_vector, false); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; @@ -196,17 +201,15 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; } - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); - if (err) - return err; + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); - return 0; + return fail; } /** @@ -219,40 +222,48 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_q_vector *q_vector; + int fail = 0; + bool link_up; int err; err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; ice_set_ring_xdp(xdp_ring); ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (err) - return err; + if (!fail) + fail = err; q_vector = vsi->rx_rings[q_idx]->q_vector; - ice_qvec_cfg_msix(vsi, q_vector); + ice_qvec_cfg_msix(vsi, q_vector, q_idx); err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); - if (err) - return err; + if (!fail) + fail = err; ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ + synchronize_net(); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } clear_bit(ICE_CFG_BUSY, vsi->state); - return 0; + return fail; } /** @@ -459,6 +470,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, /** * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Place the @count of descriptors onto Rx ring. Handle the ring wrap @@ -467,7 +479,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, * * Returns true if all allocations were successful, false if any fail. */ -static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u32 nb_buffs_extra = 0, nb_buffs = 0; union ice_32b_rx_flex_desc *rx_desc; @@ -479,8 +492,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp = ice_xdp_buf(rx_ring, ntu); if (ntu + count >= rx_ring->count) { - nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, - rx_desc, + nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, rx_ring->count - ntu); if (nb_buffs_extra != rx_ring->count - ntu) { ntu += nb_buffs_extra; @@ -493,7 +505,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, 0); } - nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count); ntu += nb_buffs; if (ntu == rx_ring->count) @@ -509,6 +521,7 @@ exit: /** * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Wrapper for internal allocation routine; figure out how many tail @@ -516,7 +529,8 @@ exit: * * Returns true if all calls to internal alloc routine succeeded */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); u16 leftover, i, tail_bumps; @@ -525,9 +539,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) - if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh)) return false; - return __ice_alloc_rx_bufs_zc(rx_ring, leftover); + return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover); } /** @@ -596,8 +610,10 @@ out: /** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring + * @xsk_pool: AF_XDP buffer pool pointer */ -static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) +static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *tx_desc; @@ -648,7 +664,7 @@ skip: if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + xsk_tx_completed(xsk_pool, xsk_frames); return completed_frames; } @@ -657,6 +673,7 @@ skip: * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX * @xdp: XDP buffer to xmit * @xdp_ring: XDP ring to produce descriptor onto + * @xsk_pool: AF_XDP buffer pool pointer * * note that this function works directly on xdp_buff, no need to convert * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning @@ -666,7 +683,8 @@ skip: * was not enough space on XDP ring */ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, - struct ice_tx_ring *xdp_ring) + struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { struct skb_shared_info *sinfo = NULL; u32 size = xdp->data_end - xdp->data; @@ -680,7 +698,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) - free_space += ice_clean_xdp_irq_zc(xdp_ring); + free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); if (unlikely(!free_space)) goto busy; @@ -700,7 +718,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, dma_addr_t dma; dma = xsk_buff_xdp_get_dma(xdp); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size); tx_buf->xdp = xdp; tx_buf->type = ICE_TX_BUF_XSK_TX; @@ -742,12 +760,14 @@ busy: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @xsk_pool: AF_XDP buffer pool pointer * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { int err, result = ICE_XDP_PASS; u32 act; @@ -758,7 +778,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (!err) return ICE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS) result = ICE_XDP_EXIT; else result = ICE_XDP_CONSUMED; @@ -769,7 +789,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: break; case XDP_TX: - result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool); if (result == ICE_XDP_CONSUMED) goto out_failure; break; @@ -821,14 +841,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, /** * ice_clean_rx_irq_zc - consumes packets from the hardware ring * @rx_ring: AF_XDP Rx ring + * @xsk_pool: AF_XDP buffer pool pointer * @budget: NAPI budget * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; struct xdp_buff *first = NULL; @@ -891,7 +913,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, + xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { xdp_xmit |= xdp_res; } else if (xdp_res == ICE_XDP_EXIT) { @@ -940,7 +963,8 @@ construct_skb: rx_ring->next_to_clean = ntc; entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) - failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); + failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, + entries_to_alloc); ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); @@ -963,17 +987,19 @@ construct_skb: /** * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @desc: AF_XDP descriptor to pull the DMA address and length from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc, unsigned int *total_bytes) { struct ice_tx_desc *tx_desc; dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len); tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -986,10 +1012,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, /** * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, unsigned int *total_bytes) { u16 ntu = xdp_ring->next_to_use; @@ -999,8 +1028,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); + dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); tx_desc = ICE_TX_DESC(xdp_ring, ntu++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -1016,60 +1045,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de /** * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @nb_pkts: count of packets to be send * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, - u32 nb_pkts, unsigned int *total_bytes) +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, u32 nb_pkts, + unsigned int *total_bytes) { u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); leftover = nb_pkts & (PKTS_PER_BATCH - 1); for (i = 0; i < batched; i += PKTS_PER_BATCH) - ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes); for (; i < batched + leftover; i++) - ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: AF_XDP buffer pool pointer * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool) { - struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + struct xdp_desc *descs = xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; int budget; - ice_clean_xdp_irq_zc(xdp_ring); + ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); + + if (!netif_carrier_ok(xdp_ring->vsi->netdev) || + !netif_running(xdp_ring->vsi->netdev)) + return true; budget = ICE_DESC_UNUSED(xdp_ring); budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; - ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed, + &total_bytes); xdp_ring->next_to_use = 0; } - ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, - &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed], + nb_pkts - nb_processed, &total_bytes); ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); - if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) - xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); return nb_pkts < budget; } @@ -1091,7 +1129,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_VSI_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) @@ -1102,7 +1140,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, ring = vsi->rx_rings[queue_id]->xdp_ring; - if (!ring->xsk_pool) + if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 6fa181f080ef139f94ae1faee9c3442c0e0371b9..45adeb513253a0d12e11251c4070855451a7ed44 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -20,16 +20,20 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + struct xsk_buff_pool __always_unused *xsk_pool) { return false; } @@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, static inline int ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, int __always_unused budget) { return 0; @@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, u16 __always_unused count) { return false; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5dbf2b4ba1b001903d03e88c9b7a603795dac1a7..0b6c8fd5bc90f7ad56768d79fd0dc234ba0b4135 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -900,8 +900,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1335,9 +1335,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1350,45 +1349,43 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1455,10 +1452,10 @@ unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } @@ -1539,7 +1536,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1898,9 +1895,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1932,17 +1926,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2171,7 +2171,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index af2879f03b8df72d94e7538243210a6e60d11656..585c3dadd9bface46192c2e73dc222442b1e09ef 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3576,9 +3576,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->complq); @@ -3593,26 +3591,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) free_cpumask_var(q_vector->affinity_mask); } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } @@ -3780,13 +3758,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; @@ -3804,8 +3784,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name, - vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); @@ -4326,7 +4306,6 @@ error: */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4340,11 +4319,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 5f92b3c7c3d4a1396437337783647cfc7c654320..511384f3ec5cb5b91a0584b3aaf9370b3d4a48ac 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -404,6 +404,12 @@ #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Retry Buffer Control */ +#define IGC_RETX_CTL 0x041C +#define IGC_RETX_CTL_WATERMARK_MASK 0xF +#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ +#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ + /* Transmit Scheduling Latency */ /* Latency between transmission scheduling (LaunchTime) and the time * the packet is transmitted to the network in nanosecond. diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cb5c7b09e8a009c49e6777424ae434e59b6e26c9..dfd6c00b4205d4ab251bbd994ceccd4ae1cd0f69 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6306,21 +6306,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - break; - case TAPRIO_CMD_DESTROY: - return igc_tsn_clear_schedule(adapter); - case TAPRIO_CMD_STATS: - igc_taprio_stats(adapter->netdev, &qopt->stats); - return 0; - case TAPRIO_CMD_QUEUE_STATS: - igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); - return 0; - default: - return -EOPNOTSUPP; - } - if (qopt->base_time < 0) return -ERANGE; @@ -6330,12 +6315,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; + igc_ptp_read(adapter, &now); + + if (igc_tsn_is_taprio_activated_by_user(adapter) && + is_base_time_past(qopt->base_time, &now)) + adapter->qbv_config_change_errors++; + adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; adapter->taprio_offload_enable = true; - igc_ptp_read(adapter, &now); - for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6429,7 +6418,23 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, if (hw->mac.type != igc_i225) return -EOPNOTSUPP; - err = igc_save_qbv_schedule(adapter, qopt); + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + err = igc_save_qbv_schedule(adapter, qopt); + break; + case TAPRIO_CMD_DESTROY: + err = igc_tsn_clear_schedule(adapter); + break; + case TAPRIO_CMD_STATS: + igc_taprio_stats(adapter->netdev, &qopt->stats); + return 0; + case TAPRIO_CMD_QUEUE_STATS: + igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); + return 0; + default: + return -EOPNOTSUPP; + } + if (err) return err; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 22cefb1eeedfa71651442d9da319778fa3b2101c..d68fa7f3d5f0780655e18348002ca35c0272628b 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -49,12 +49,19 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) return new_flags; } +static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); +} + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u16 txoffset; - if (!is_any_launchtime(adapter)) + if (!igc_tsn_is_tx_mode_in_tsn(adapter)) return; switch (adapter->link_speed) { @@ -78,6 +85,23 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) wr32(IGC_GTXOFFSET, txoffset); } +static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl; + + retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; + wr32(IGC_RETX_CTL, retxctl); +} + +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + adapter->taprio_offload_enable; +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -91,6 +115,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + if (igc_is_device_id_i226(hw)) + igc_tsn_restore_retx_default(adapter); + tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); @@ -111,6 +138,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) return 0; } +/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes + * to 88 Bytes by setting RETX_CTL register using the recommendation from: + * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 + * Item 9: TSN: Packet Transmission Might Cross the Qbv Window + * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + */ +static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl, watermark; + + retxctl = rd32(IGC_RETX_CTL); + watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; + /* Set QBVFULLTH value using watermark and set QBVFULLEN */ + retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | + IGC_RETX_CTL_QBVFULLEN; + wr32(IGC_RETX_CTL, retxctl); +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -123,6 +169,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + if (igc_is_device_id_i226(hw)) + igc_tsn_set_retx_qbvfullthreshold(adapter); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; u32 txqctl = 0; @@ -262,14 +311,6 @@ skip_cbs: s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); - - /* Increase the counter if scheduling into the past while - * Gate Control List (GCL) is running. - */ - if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - (adapter->qbv_count > 1)) - adapter->qbv_config_change_errors++; } else { if (igc_is_device_id_i226(hw)) { ktime_t adjust_time, expires_time; @@ -331,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter) return err; } -int igc_tsn_offload_apply(struct igc_adapter *adapter) +static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) { - struct igc_hw *hw = &adapter->hw; + bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & + IGC_FLAG_TSN_ANY_ENABLED); - /* Per I225/6 HW Design Section 7.5.2.1, transmit mode - * cannot be changed dynamically. Require reset the adapter. + return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || + (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); +} + +int igc_tsn_offload_apply(struct igc_adapter *adapter) +{ + /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change + * from legacy->tsn or tsn->legacy, then reset adapter is needed. */ if (netif_running(adapter->netdev) && - (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { + igc_tsn_will_tx_mode_change(adapter)) { schedule_work(&adapter->reset_task); return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index b53e6af560b73857829d0361356f83aee9bbd57f..98ec845a86bf00935089bd7c5c7a3a7422facf05 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -7,5 +7,6 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b06e245629739f6f16005a51e181ce057d3e8857..d8be0e4dcb072b8cf8e8d26a22173829de3d40c8 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -946,15 +946,13 @@ jme_udpsum(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IP)) return csum; skb_set_network_header(skb, ETH_HLEN); - if ((ip_hdr(skb)->protocol != IPPROTO_UDP) || - (skb->len < (ETH_HLEN + - (ip_hdr(skb)->ihl << 2) + - sizeof(struct udphdr)))) { + + if (ip_hdr(skb)->protocol != IPPROTO_UDP || + skb->len < (ETH_HLEN + ip_hdrlen(skb) + sizeof(struct udphdr))) { skb_reset_network_header(skb); return csum; } - skb_set_transport_header(skb, - ETH_HLEN + (ip_hdr(skb)->ihl << 2)); + skb_set_transport_header(skb, ETH_HLEN + ip_hdrlen(skb)); csum = udp_hdr(skb)->check; skb_reset_transport_header(skb); skb_reset_network_header(skb); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8c45ad983abc0c4b94a731a948dd6f56f48941ef..0d62a33afa80caf687e9175e4ed0d90ce054dc87 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en) { struct mvpp2_port *port; - int i; + int i, j; for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; if (port->priv->percpu_pools) { - for (i = 0; i < port->nrxqs; i++) - mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], + for (j = 0; j < port->nrxqs; j++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j], port->tx_fc & en); } else { mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 61334a71058c7594a61ca768ce041f92ab238d24..e212a4ba92751fc807e6aea5d9d187f008eb30e5 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri { struct mtk_wed_flow_block_priv *priv = cb_priv; struct flow_cls_offload *cls = type_data; - struct mtk_wed_hw *hw = priv->hw; + struct mtk_wed_hw *hw = NULL; - if (!tc_can_offload(priv->dev)) + if (!priv || !tc_can_offload(priv->dev)) return -EOPNOTSUPP; if (type != TC_SETUP_CLSFLOWER) return -EOPNOTSUPP; + hw = priv->hw; return mtk_flow_offload_cmd(hw->eth, cls, hw->index); } @@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev, flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); kfree(block_cb->cb_priv); + block_cb->cb_priv = NULL; } return 0; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5fd82c67b6ab7f9e827c936408d66cc93c5db51e..bb5da42edc23a07fff50988c5e01e90bd51c30a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -130,7 +130,7 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_DEFAULT_LRO_TIMEOUT 32 -#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 +#define MLX5E_DEFAULT_SHAMPO_TIMEOUT 1024 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 6c9ccccca81e27dc51829ed05d1ca3259a9baa92..64b62ed17b07a7383045779b4bb017f4bc642670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_headers_entry_size, mlx5e_shampo_get_log_hd_entry_size(mdev, params)); MLX5_SET(rqc, rqc, reservation_timeout, - params->packet_merge.timeout); + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); MLX5_SET(rqc, rqc, shampo_match_criteria_type, params->packet_merge.shampo.match_criteria_type); MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, @@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, return wqebbs; } +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 749b2ec0436eeadc0f48e2c0e813eeaa8f403ed0..3f8986f9d86291e6744315ddc8d15d0568e008a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 22918b2ef7f128849be838063819ed12509abb45..09433b91be176fb639a2b54cf3fb9595621c168b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } + mutex_lock(&priv->state_lock); err = mlx5e_safe_reopen_channels(priv); + mutex_unlock(&priv->state_lock); if (!err) { to_ctx->status = 1; /* all channels recovered */ return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 8cf8ba2622f2bfaa03b41cc9af0357bf8050e71e..71a168746ebe21d496612cf7f239671cd0d4cf20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -932,6 +932,7 @@ err_rule: mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: + *attr = *old_attr; kfree(old_attr); err_attr: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 6e00afe4671b78ae48eb3eaaddf8456eb09ab0e4..797db853de3638d95f19cf550998d6af477174fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -51,9 +51,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; - if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || - MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + if (IS_ENABLED(CONFIG_MLX5_CLS_ACT) && + ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))) caps |= MLX5_IPSEC_CAP_PRIO; if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 00d5661dc62e1a60388ce7077fa44e9ce5481678..36845872ae94e7e01e325546b9b6b192c99e3f0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1409,7 +1409,12 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + if (err) { + netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err); + goto out; + } + mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3eccdadc0357810e18d93ba4d1104b99b1ba965d..773624bb2c5d549f30157ff158be7e909384585c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, if (num_tuples <= 0) { netdev_warn(priv->netdev, "%s: flow is not valid %d\n", __func__, num_tuples); - return num_tuples; + return num_tuples < 0 ? num_tuples : -EINVAL; } eth_ft = get_flow_table(priv, fs, num_tuples); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6f686fabed4462801ae25200865848323893aa4c..5df904639b0ce6295010c8c94bb82be6a31a339c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5167,18 +5167,6 @@ const struct net_device_ops mlx5e_netdev_ops = { #endif }; -static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) -{ - int i; - - /* The supported periods are organized in ascending order */ - for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) - if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) - break; - - return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; @@ -5308,7 +5296,7 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *rq_stats; ASSERT_RTNL(); - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; channel_stats = priv->channel_stats[i]; @@ -5328,6 +5316,9 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_sq_stats *sq_stats; ASSERT_RTNL(); + if (!priv->stats_nch) + return; + /* no special case needed for ptp htb etc since txq2sq_stats is kept up * to date for active sq_stats, otherwise get_base_stats takes care of * inactive sqs. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 979c49ae6b5cc987eeac168adfc03f2278a0d91e..b43ca0b762c306112e8ead14a3e7cda8dcdf0738 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -207,6 +207,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct devlink *devlink = priv_to_devlink(dev); /* if this is the driver that initiated the fw reset, devlink completed the reload */ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { @@ -218,9 +219,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else mlx5_load_one(dev, true); - devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + devl_lock(devlink); + devlink_remote_reload_actions_performed(devlink, 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + devl_unlock(devlink); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index f7b01b3f0cba360449533c03c148e2bbc3d53e0d..1477db7f5307e095ca1025417598c444215893d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -48,6 +48,7 @@ static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct irq_affinity_desc auto_desc = {}; + struct mlx5_irq *irq; u32 irq_index; int err; @@ -64,9 +65,12 @@ irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_de else cpu_get(pool, cpumask_first(&af_desc->mask)); } - return mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, - NULL); + irq = mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); + if (IS_ERR(irq)) + xa_erase(&pool->irqs, irq_index); + return irq; } /* Looking for the IRQ with the smallest refcount that fits req_mask. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d0871c46b8c54376154cdf36e90c13636b7582ef..cf8045b926892e2d9d1a1e01463539243a8d6ea6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1538,7 +1538,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + if (ldev->pf[i].netdev == slave) { port = i; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f6deb5a3f82024e528436af8fd66d238518a6588..eeb0b7ea05f1260b4177e45d38e76da367897307 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) } static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, - u8 *host_buses, u8 *sd_group) + u8 *host_buses) { u32 out[MLX5_ST_SZ_DW(mpir_reg)]; int err; @@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, if (err) return err; - err = mlx5_query_nic_vport_sd_group(dev, sd_group); - if (err) - return err; - *sdm = MLX5_GET(mpir_reg, out, sdm); *host_buses = MLX5_GET(mpir_reg, out, host_buses); @@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev) if (mlx5_core_is_ecpf(dev)) return 0; + err = mlx5_query_nic_vport_sd_group(dev, &sd_group); + if (err) + return err; + + if (!sd_group) + return 0; + if (!MLX5_CAP_MCAM_REG(dev, mpir)) return 0; - err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group); + err = mlx5_query_sd(dev, &sdm, &host_buses); if (err) return err; if (!sdm) return 0; - if (!sd_group) - return 0; - group_id = mlx5_sd_group_id(dev, sd_group); if (!mlx5_sd_is_supported(dev, host_buses)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 527da58c79535b5895d5c2045409279174c2d506..5b7e6f4b5c7ea10efd4b47eb621726482e83bb72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2142,7 +2142,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) /* Panic tear down fw command will stop the PCI bus communication * with the HCA, so the health poll is no longer needed. */ - mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_fast_teardown_hca(dev); @@ -2177,6 +2176,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index b2986175d9afe80abb0e109fcfe56590f14b0cf7..b706f1486504a7c90049d9071c6541a7016067de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) struct mlx5_core_dev *mdev = sf_dev->mdev; set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state); + mlx5_drain_health_wq(mdev); mlx5_unload_one(mdev, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 042ca0349124332ab875d11b9c8112b9b1173de4..d1db04baa1fa6f01a297879dd383aa42ace22a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -7,7 +7,7 @@ /* don't try to optimize STE allocation if the stack is too constaraining */ #define DR_RULE_MAX_STES_OPTIMIZED 0 #else -#define DR_RULE_MAX_STES_OPTIMIZED 5 +#define DR_RULE_MAX_STES_OPTIMIZED 2 #endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index bc94e75a7aebd1736496fef7fc7ade7302ab1388..e7777700ee18a7e16d1bdcf9a61747e4e5b7f839 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -40,6 +40,7 @@ */ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 +#define MLXBF_GIGE_MAX_FILTER_IDX 3 /* Define for broadcast MAC literal */ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF @@ -175,6 +176,13 @@ enum mlxbf_gige_res { int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv); void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); + +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 dmac); void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index b157f0f1c5a8864e950d148b3b35ce9d47584b24..385a56ac73481ad28b5873b262241dcd0068241d 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev) if (err) goto napi_deinit; + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); + mlxbf_gige_enable_multicast_rx(priv); + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) void __iomem *plu_base; void __iomem *base; int addr, phy_irq; + unsigned int i; int err; base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); @@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++) + mlxbf_gige_disable_mac_rx_filter(priv, i); + mlxbf_gige_disable_multicast_rx(priv); + mlxbf_gige_disable_promisc(priv); + /* Write initial MAC address to hardware */ mlxbf_gige_initial_mac(priv); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 98a8681c21b9ca8dedf0884435608de55b5feda7..4d14cb13fd64e1fe6970c6dcd6d747a36a1275cc 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -62,6 +62,8 @@ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 +#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL 0x0530 +#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index 6999843584934808dfc376c22e59274c0b0f15b8..eb62620b63c7fc6fcae084444873efcb666b304f 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -11,15 +11,31 @@ #include "mlxbf_gige.h" #include "mlxbf_gige_regs.h" -void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, - unsigned int index, u64 dmac) +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv) { void __iomem *base = priv->base; - u64 control; + u64 data; - /* Write destination MAC to specified MAC RX filter */ - writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + - (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 data; + + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; /* Enable MAC receive filter mask for specified index */ control = readq(base + MLXBF_GIGE_CONTROL); @@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, writeq(control, base + MLXBF_GIGE_CONTROL); } +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; + + /* Disable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); + control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); + writeq(control, base + MLXBF_GIGE_CONTROL); +} + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac) +{ + void __iomem *base = priv->base; + + /* Write destination MAC to specified MAC RX filter */ + writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); +} + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 *dmac) { diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 86034ea4ba5b6128284787833243603fc5587973..c002ede3640202bb4cb2d0628d6b94701b6a9bd2 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_META config FBNIC tristate "Meta Platforms Host Network Interface" depends on X86_64 || COMPILE_TEST - depends on S390=n + depends on !S390 depends on MAX_SKB_FRAGS < 22 depends on PCI_MSI select PHYLINK diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d2f07e179e86ba5f70e2a844093482fb899f6267..39f56973746d7f70bc5f666f2e4678398fa5cb8b 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } @@ -1788,7 +1792,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1799,16 +1802,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 714d2e80469404ef0e950998a8cbce85449fa8f2..3507c2e28110d23b896c41044708e605130a2f0f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4349,7 +4349,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; + netif_stop_queue(dev); + return NETDEV_TX_BUSY; } opts[1] = rtl8169_tx_vlan_tag(skb); @@ -4405,11 +4406,6 @@ err_dma_0: dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; - -err_stop_0: - netif_stop_queue(dev); - dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; } static unsigned int rtl_last_frag_len(struct sk_buff *skb) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d3c5306f1c41f7aca6f6a9170f7946a3abfe6cd0..93a78fd0737b6c93c2bef9ed7ad87de50dcac52d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) #define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKMOD */ -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK 0x1 /* LNKSPEED */ #define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f98741d2607ec45b6558d3da817c3e10792386c8..31c387cc5f269dd604d1d810137c80ac53b0d63e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -786,7 +786,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) else x->pcs_speed = SPEED_10; - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK); + x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, x->pcs_duplex ? "Full" : "Half"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 12689774d75585db08ba38495e98913f499136e1..f3a1b179aaeaca2189974fce4724db0a175b5de3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2912,7 +2912,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) u32 channels_to_check = tx_channel_count > rx_channel_count ? tx_channel_count : rx_channel_count; u32 chan; - int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; + int status[MAX_T(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; /* Make sure we never check beyond our status buffer. */ if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index fa5500decc9602c38ccf930cb6983a401f5b9c1f..c7d9221fafdcb1af0fc9d483177043f936fed626 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -160,16 +160,16 @@ #define XAE_RCW1_OFFSET 0x00000404 /* Rx Configuration Word 1 */ #define XAE_TC_OFFSET 0x00000408 /* Tx Configuration */ #define XAE_FCC_OFFSET 0x0000040C /* Flow Control Configuration */ -#define XAE_EMMC_OFFSET 0x00000410 /* EMAC mode configuration */ -#define XAE_PHYC_OFFSET 0x00000414 /* RGMII/SGMII configuration */ +#define XAE_EMMC_OFFSET 0x00000410 /* MAC speed configuration */ +#define XAE_PHYC_OFFSET 0x00000414 /* RX Max Frame Configuration */ #define XAE_ID_OFFSET 0x000004F8 /* Identification register */ -#define XAE_MDIO_MC_OFFSET 0x00000500 /* MII Management Config */ -#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MII Management Control */ -#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ -#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ +#define XAE_MDIO_MC_OFFSET 0x00000500 /* MDIO Setup */ +#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MDIO Control */ +#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MDIO Write Data */ +#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MDIO Read Data */ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ -#define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ +#define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ @@ -308,7 +308,7 @@ */ #define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF -/* Bit masks for Axi Ethernet FMI register */ +/* Bit masks for Axi Ethernet FMC register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ #define XAE_FMI_IND_MASK 0x00000003 /* Index Mask */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index e342f387c3dd643cb2cb173524484cf2004547ae..02fdf66e07faa33887f361c699ded2360b9cae5d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2219,9 +2219,9 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); - axienet_setoptions(ndev, lp->options); napi_enable(&lp->napi_rx); napi_enable(&lp->napi_tx); + axienet_setoptions(ndev, lp->options); } /** diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b3ddc9a629d9d95463977b7ae50e52683257c95a..fad5b65644645ded60601d4a15ef606ef19b3a13 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -14,9 +14,7 @@ #include "fjes.h" #include "fjes_trace.h" -#define MAJ 1 -#define MIN 2 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) +#define DRV_VERSION "1.2" #define DRV_NAME "fjes" char fjes_driver_name[] = DRV_NAME; char fjes_driver_version[] = DRV_VERSION; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 427b91aca50d3a370e4ad00071f0c477fefa5076..0696faf60013e0669ceb4aa39dfbac17ad741540 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1269,6 +1269,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index d12e353742318ef8f2951947ac8e19c21220d243..e982e9ce44a5961005495482e0d7146c625c2e91 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -653,13 +653,7 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) unsigned long *possible = phydev->possible_interfaces; unsigned int serdes_mode, rate_adapt; phy_interface_t interface; - int i, val, ret; - - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_CFG_10M, val, val != 0, - 1000, 100000, false); - if (ret) - return ret; + int i, val; /* Walk the media-speed configuration registers to determine which * host-side serdes modes may be used by the PHY depending on the @@ -708,6 +702,25 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) return 0; } +static int aqr113c_fill_interface_modes(struct phy_device *phydev) +{ + int val, ret; + + /* It's been observed on some models that - when coming out of suspend + * - the FW signals that the PHY is ready but the GLOBAL_CFG registers + * continue on returning zeroes for some time. Let's poll the 100M + * register until it returns a real value as both 113c and 115c support + * this mode. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_CFG_100M, val, val != 0, + 1000, 100000, false); + if (ret) + return ret; + + return aqr107_fill_interface_modes(phydev); +} + static int aqr113c_config_init(struct phy_device *phydev) { int ret; @@ -725,7 +738,7 @@ static int aqr113c_config_init(struct phy_device *phydev) if (ret) return ret; - return aqr107_fill_interface_modes(phydev); + return aqr113c_fill_interface_modes(phydev); } static int aqr107_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dd519805deee318270758c5955859d570d355977..65b0a3115e14cd99a44b9fefe015c1b6d829d4ac 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1389,6 +1389,8 @@ static int ksz9131_config_init(struct phy_device *phydev) const struct device *dev_walker; int ret; + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; @@ -1438,28 +1440,30 @@ static int ksz9131_config_init(struct phy_device *phydev) #define MII_KSZ9131_AUTO_MDIX 0x1C #define MII_KSZ9131_AUTO_MDI_SET BIT(7) #define MII_KSZ9131_AUTO_MDIX_SWAP_OFF BIT(6) +#define MII_KSZ9131_DIG_AXAN_STS 0x14 +#define MII_KSZ9131_DIG_AXAN_STS_LINK_DET BIT(14) +#define MII_KSZ9131_DIG_AXAN_STS_A_SELECT BIT(12) static int ksz9131_mdix_update(struct phy_device *phydev) { int ret; - ret = phy_read(phydev, MII_KSZ9131_AUTO_MDIX); - if (ret < 0) - return ret; - - if (ret & MII_KSZ9131_AUTO_MDIX_SWAP_OFF) { - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix_ctrl = ETH_TP_MDI; - else - phydev->mdix_ctrl = ETH_TP_MDI_X; + if (phydev->mdix_ctrl != ETH_TP_MDI_AUTO) { + phydev->mdix = phydev->mdix_ctrl; } else { - phydev->mdix_ctrl = ETH_TP_MDI_AUTO; - } + ret = phy_read(phydev, MII_KSZ9131_DIG_AXAN_STS); + if (ret < 0) + return ret; - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix = ETH_TP_MDI; - else - phydev->mdix = ETH_TP_MDI_X; + if (ret & MII_KSZ9131_DIG_AXAN_STS_LINK_DET) { + if (ret & MII_KSZ9131_DIG_AXAN_STS_A_SELECT) + phydev->mdix = ETH_TP_MDI; + else + phydev->mdix = ETH_TP_MDI_X; + } else { + phydev->mdix = ETH_TP_MDI_INVALID; + } + } return 0; } diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index bed839237fb55b506a693bd4af4d636afafbbe15..87865918dab6d0713dc5802b656a4b922e411df5 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1465,6 +1465,13 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001cc960), + .name = "RTL8366S Gigabit Ethernet", + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 897b979ec03c81593505d24fb4a51e502ddb5e83..3b5fcaf0dd36db1667e77428a4fbe912cdd8ef82 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -237,16 +237,6 @@ static int vsc739x_config_init(struct phy_device *phydev) return 0; } -static int vsc73xx_config_aneg(struct phy_device *phydev) -{ - /* The VSC73xx switches does not like to be instructed to - * do autonegotiation in any way, it prefers that you just go - * with the power-on/reset defaults. Writing some registers will - * just make autonegotiation permanently fail. - */ - return 0; -} - /* This adds a skew for both TX and RX clocks, so the skew should only be * applied to "rgmii-id" interfaces. It may not work as expected * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. @@ -444,7 +434,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -453,7 +442,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -462,7 +450,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -471,7 +458,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index ec20953e0f825cb7fa99e6c91258d715cf30019d..4f032b16a8a0a6ce33911179e60764136c1b0cef 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -401,9 +401,14 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, rdesc->ops = &pse_pi_ops; rdesc->owner = pcdev->owner; - rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS | - REGULATOR_CHANGE_CURRENT; - rinit_data->constraints.max_uA = MAX_PI_CURRENT; + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + + if (pcdev->ops->pi_set_current_limit) { + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; + rinit_data->constraints.max_uA = MAX_PI_CURRENT; + } + rinit_data->supply_regulator = "vpwr"; rconfig.dev = pcdev->dev; diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index 61f6ad9c1934e1dc17447886728492aa1a1f509e..2ea75686a3190730aed82045ef113db3abafdca0 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 Bootlin, Kory Maincent */ +#include #include #include #include @@ -29,6 +30,8 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 +#define TPS23881_REG_DEVID_MASK 0xF0 +#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -750,7 +753,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (ret != 0x22) { + if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 687d70cfc5563526d3e51d046bcc7a1bf3f00acc..46afb95ffabe3b0075fdf9782af9803610d43016 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -286,10 +286,11 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - if (urb->actual_length <= IPHETH_IP_ALIGN) { - dev->net->stats.rx_length_errors++; - return; - } + /* iPhone may periodically send URBs with no payload + * on the "bulk in" endpoint. It is safe to ignore them. + */ + if (urb->actual_length == 0) + goto rx_submit; /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and @@ -298,7 +299,8 @@ static void ipheth_rcvbulk_callback(struct urb *urb) * URB received from the bulk IN endpoint. */ if (unlikely - (((char *)urb->transfer_buffer)[0] == 0 && + (urb->actual_length == 4 && + ((char *)urb->transfer_buffer)[0] == 0 && ((char *)urb->transfer_buffer)[1] == 1)) goto rx_submit; @@ -306,7 +308,6 @@ static void ipheth_rcvbulk_callback(struct urb *urb) if (retval != 0) { dev_err(&dev->intf->dev, "%s: callback retval: %d\n", __func__, retval); - return; } rx_submit: @@ -354,13 +355,14 @@ static int ipheth_carrier_set(struct ipheth_device *dev) 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); - if (retval < 0) { + if (retval <= 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } - if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) { + if ((retval == 1 && dev->ctrl_buf[0] == IPHETH_CARRIER_ON) || + (retval >= 2 && dev->ctrl_buf[1] == IPHETH_CARRIER_ON)) { netif_carrier_on(dev->net); if (dev->tx_urb->status != -EINPROGRESS) netif_wake_queue(dev->net); @@ -475,8 +477,8 @@ static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); - cancel_delayed_work_sync(&dev->carrier_work); netif_stop_queue(net); + cancel_delayed_work_sync(&dev->carrier_work); return 0; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 386d62769dedb1411150597b5ddc4d951e91b666..4823dbdf54656f08c6c590ee6641a932d43ef0c2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -201,6 +201,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } @@ -1431,6 +1432,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ + {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 0a662e42ed96593cf20d4209af2bc64a74f307df..cb7d2f798fb43622c7e954aeebeab8edede4cb49 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -179,6 +179,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; int rc = 0; + int err; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); @@ -189,11 +190,17 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, SR_NSR, &value); + err = sr_read_reg(dev, SR_NSR, &value); + if (err < 0) + return err; + if (value & NSR_LINKST) rc = 1; } - sr_share_read_word(dev, 1, loc, &res); + err = sr_share_read_word(dev, 1, loc, &res); + if (err < 0) + return err; + if (rc == 1) res = le16_to_cpu(res) | BMSR_LSTATUS; else diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0383a3e136d6a07f57d71dbdd5a8ae09ac792a4a..3f10c72743e94d6f356fc3c17aae79b02d14e00f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3658,6 +3658,9 @@ static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) @@ -3675,6 +3678,9 @@ static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) @@ -3743,7 +3749,11 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); - if (err) + + /* Don't break the tx resize action if the vq coalescing is not + * supported. The same is true for rx resize below. + */ + if (err && err != -EOPNOTSUPP) return err; } @@ -3758,7 +3768,7 @@ static int virtnet_set_ringparam(struct net_device *dev, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); - if (err) + if (err && err != -EOPNOTSUPP) return err; } } diff --git a/drivers/net/wan/fsl_qmc_hdlc.c b/drivers/net/wan/fsl_qmc_hdlc.c index c5e7ca793c43311911e9f7cf0a1c36215a6a641e..8fcfbde31a1c66db1e036cee5151a9b8e58f3c91 100644 --- a/drivers/net/wan/fsl_qmc_hdlc.c +++ b/drivers/net/wan/fsl_qmc_hdlc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,7 @@ struct qmc_hdlc { struct qmc_chan *qmc_chan; struct net_device *netdev; struct framer *framer; - spinlock_t carrier_lock; /* Protect carrier detection */ + struct mutex carrier_lock; /* Protect carrier detection */ struct notifier_block nb; bool is_crc32; spinlock_t tx_lock; /* Protect tx descriptors */ @@ -60,7 +61,7 @@ static int qmc_hdlc_framer_set_carrier(struct qmc_hdlc *qmc_hdlc) if (!qmc_hdlc->framer) return 0; - guard(spinlock_irqsave)(&qmc_hdlc->carrier_lock); + guard(mutex)(&qmc_hdlc->carrier_lock); ret = framer_get_status(qmc_hdlc->framer, &framer_status); if (ret) { @@ -249,6 +250,7 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl struct qmc_hdlc_desc *desc = context; struct net_device *netdev; struct qmc_hdlc *qmc_hdlc; + size_t crc_size; int ret; netdev = desc->netdev; @@ -267,15 +269,26 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl if (flags & QMC_RX_FLAG_HDLC_CRC) /* CRC error */ netdev->stats.rx_crc_errors++; kfree_skb(desc->skb); - } else { - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += length; + goto re_queue; + } - skb_put(desc->skb, length); - desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); - netif_rx(desc->skb); + /* Discard the CRC */ + crc_size = qmc_hdlc->is_crc32 ? 4 : 2; + if (length < crc_size) { + netdev->stats.rx_length_errors++; + kfree_skb(desc->skb); + goto re_queue; } + length -= crc_size; + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + + skb_put(desc->skb, length); + desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); + netif_rx(desc->skb); +re_queue: /* Re-queue a transfer using the same descriptor */ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, desc->dma_size); if (ret) { @@ -706,7 +719,7 @@ static int qmc_hdlc_probe(struct platform_device *pdev) qmc_hdlc->dev = dev; spin_lock_init(&qmc_hdlc->tx_lock); - spin_lock_init(&qmc_hdlc->carrier_lock); + mutex_init(&qmc_hdlc->carrier_lock); qmc_hdlc->qmc_chan = devm_qmc_chan_get_bychild(dev, dev->of_node); if (IS_ERR(qmc_hdlc->qmc_chan)) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index d08c04343e9007f61c8e11a543ee3c8c011962bd..44406e0b4a342ff11f6c30b5cc3fc4bc89e22fa0 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -162,6 +162,60 @@ static int ath12k_dp_prepare_htt_metadata(struct sk_buff *skb) return 0; } +static void ath12k_dp_tx_move_payload(struct sk_buff *skb, + unsigned long delta, + bool head) +{ + unsigned long len = skb->len; + + if (head) { + skb_push(skb, delta); + memmove(skb->data, skb->data + delta, len); + skb_trim(skb, len); + } else { + skb_put(skb, delta); + memmove(skb->data + delta, skb->data, len); + skb_pull(skb, delta); + } +} + +static int ath12k_dp_tx_align_payload(struct ath12k_base *ab, + struct sk_buff **pskb) +{ + u32 iova_mask = ab->hw_params->iova_mask; + unsigned long offset, delta1, delta2; + struct sk_buff *skb2, *skb = *pskb; + unsigned int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + int ret = 0; + + offset = (unsigned long)skb->data & iova_mask; + delta1 = offset; + delta2 = iova_mask - offset + 1; + + if (headroom >= delta1) { + ath12k_dp_tx_move_payload(skb, delta1, true); + } else if (tailroom >= delta2) { + ath12k_dp_tx_move_payload(skb, delta2, false); + } else { + skb2 = skb_realloc_headroom(skb, iova_mask); + if (!skb2) { + ret = -ENOMEM; + goto out; + } + + dev_kfree_skb_any(skb); + + offset = (unsigned long)skb2->data & iova_mask; + if (offset) + ath12k_dp_tx_move_payload(skb2, offset, true); + *pskb = skb2; + } + +out: + return ret; +} + int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, struct sk_buff *skb) { @@ -184,6 +238,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, bool tcl_ring_retry; bool msdu_ext_desc = false; bool add_htt_metadata = false; + u32 iova_mask = ab->hw_params->iova_mask; if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) return -ESHUTDOWN; @@ -279,6 +334,23 @@ tcl_ring_sel: goto fail_remove_tx_buf; } + if (iova_mask && + (unsigned long)skb->data & iova_mask) { + ret = ath12k_dp_tx_align_payload(ab, &skb); + if (ret) { + ath12k_warn(ab, "failed to align TX buffer %d\n", ret); + /* don't bail out, give original buffer + * a chance even unaligned. + */ + goto map; + } + + /* hdr is pointing to a wrong place after alignment, + * so refresh it for later use. + */ + hdr = (void *)skb->data; + } +map: ti.paddr = dma_map_single(ab->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(ab->dev, ti.paddr)) { atomic_inc(&ab->soc_stats.tx_err.misc_fail); diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 2e11ea7635740ce289ee7d248e71caf3d6d9c78f..7b0b6a7f4701ab31ebbc5c429f9d1a7de4baaa1a 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -924,6 +924,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, { .name = "wcn7850 hw2.0", @@ -1000,6 +1002,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = &wcn7850_uuid, .supports_dynamic_smps_6ghz = false, + + .iova_mask = ATH12K_PCIE_MAX_PAYLOAD_SIZE - 1, }, { .name = "qcn9274 hw2.0", @@ -1072,6 +1076,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, }; diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index e792eb6b249b41db7aebba59ec2f3fbc9dcb152b..b1d302c48326b8eeb8c9bde4dfe2485dfedb1b01 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -96,6 +96,8 @@ #define ATH12K_M3_FILE "m3.bin" #define ATH12K_REGDB_FILE_NAME "regdb.bin" +#define ATH12K_PCIE_MAX_PAYLOAD_SIZE 128 + enum ath12k_hw_rate_cck { ATH12K_HW_RATE_CCK_LP_11M = 0, ATH12K_HW_RATE_CCK_LP_5_5M, @@ -215,6 +217,8 @@ struct ath12k_hw_params { const guid_t *acpi_guid; bool supports_dynamic_smps_6ghz; + + u32 iova_mask; }; struct ath12k_hw_ops { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8106297f0bc1c786fbb10489d4396cb3e93af08e..ce41c8153080cdd4d3e6b13e3398ddee7e7e4292 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9193,6 +9193,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) hw->vif_data_size = sizeof(struct ath12k_vif); hw->sta_data_size = sizeof(struct ath12k_sta); + hw->extra_tx_headroom = ab->hw_params->iova_mask; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 876c029f58f62c3d9644dc44c80eabae8b084739..9e0b9e329bda32a733f48b54dda389c553a0d60f 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -473,7 +473,8 @@ static void __ath12k_pci_ext_irq_disable(struct ath12k_base *ab) { int i; - clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); + if (!test_and_clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags)) + return; for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c index bead19db2c9a21511e3df4cfb916ba943730af23..9b8684abbe40aea8fc93f479ad0700f9a5871289 100644 --- a/drivers/net/wireless/ath/ath12k/wow.c +++ b/drivers/net/wireless/ath/ath12k/wow.c @@ -361,7 +361,7 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, struct ath12k *ar = arvif->ar; unsigned long wow_mask = 0; int pattern_id = 0; - int ret, i; + int ret, i, j; /* Setup requested WOW features */ switch (arvif->vdev_type) { @@ -431,9 +431,9 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, eth_pattern->pattern_len); /* convert bitmask to bytemask */ - for (i = 0; i < eth_pattern->pattern_len; i++) - if (eth_pattern->mask[i / 8] & BIT(i % 8)) - new_pattern.bytemask[i] = 0xff; + for (j = 0; j < eth_pattern->pattern_len; j++) + if (eth_pattern->mask[j / 8] & BIT(j % 8)) + new_pattern.bytemask[j] = 0xff; new_pattern.pattern_len = eth_pattern->pattern_len; new_pattern.pkt_offset = eth_pattern->pkt_offset; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1585a5653ee4b9435fb4490281ecfe0b57395aaa..d4cc5fa92341d5be73fa41a9c5bbbdf60bcdf837 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, /* Single PMK operation */ pmk_op->count = cpu_to_le16(1); length += sizeof(struct brcmf_pmksa_v3); - memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); - memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); - pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + if (pmksa->bssid) + memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); + if (pmksa->pmkid) { + memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); + pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + } + if (pmksa->ssid && pmksa->ssid_len) { + memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len); + pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len; + } pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b59de4f80b4b895b8ea92232830e84c684d195ec..27a7e0b5b3d51ed2c433435cfd0a17b0a70ce530 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -639,7 +639,8 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, bool cmd_queue); -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr); +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, u8 **hdr, unsigned int hdr_room); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 2e780fb2da426cf44f33e4a34e27d64eb825fa03..b1846abb99b78f4fd69dd5382987408e38cdfa4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -168,6 +168,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; dma_addr_t start_hdr_phys; u16 length, amsdu_pad; u8 *start_hdr; @@ -260,7 +261,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, int ret; tb_len = min_t(unsigned int, tso.size, data_left); - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, + tb_len); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) goto out_err; @@ -272,6 +274,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, goto out_err; data_left -= tb_len; + data_offset += tb_len; tso_build_data(skb, &tso, tb_len); } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 22d482ae53d97af8bb70238dbe6fb44382a733e4..9fe050f0ddc16002565efce0bf18aecaf771faa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1814,23 +1814,31 @@ out: /** * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list * @sgt: scatter gather table - * @addr: Virtual address + * @offset: Offset into the mapped memory (i.e. SKB payload data) + * @len: Length of the area * - * Find the entry that includes the address for the given address and return - * correct physical address for the TB entry. + * Find the DMA address that corresponds to the SKB payload data at the + * position given by @offset. * * Returns: Address for TB entry */ -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr) +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len) { struct scatterlist *sg; + unsigned int sg_offset = 0; int i; + /* + * Search the mapped DMA areas in the SG for the area that contains the + * data at offset with the given length. + */ for_each_sgtable_dma_sg(sgt, sg, i) { - if (addr >= sg_virt(sg) && - (u8 *)addr < (u8 *)sg_virt(sg) + sg_dma_len(sg)) - return sg_dma_address(sg) + - ((unsigned long)addr - (unsigned long)sg_virt(sg)); + if (offset >= sg_offset && + offset + len <= sg_offset + sg_dma_len(sg)) + return sg_dma_address(sg) + offset - sg_offset; + + sg_offset += sg_dma_len(sg); } WARN_ON_ONCE(1); @@ -1875,7 +1883,9 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + /* Only map the data, not the header (it is copied to the TSO page) */ + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), + skb->data_len); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1900,6 +1910,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; u16 length, iv_len, amsdu_pad; dma_addr_t start_hdr_phys; u8 *start_hdr, *pos_hdr; @@ -2000,7 +2011,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, data_left); dma_addr_t tb_phys; - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, size); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) return -EINVAL; @@ -2011,6 +2022,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, tb_phys, size); data_left -= size; + data_offset += size; tso_build_data(skb, &tso, size); } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 2e6268cb06c002a129fbd5ddf552a6267b34de06..23b228804289be7e962704b7754de5d3155cf732 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -303,6 +303,7 @@ mt7921_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mvif->bss_conf.mt76.omac_idx = mvif->bss_conf.mt76.idx; mvif->phy = phy; + mvif->bss_conf.vif = mvif; mvif->bss_conf.mt76.band_idx = 0; mvif->bss_conf.mt76.wmm_idx = mvif->bss_conf.mt76.idx % MT76_CONNAC_MAX_WMM_SETS; @@ -1182,7 +1183,7 @@ static void mt7921_ipv6_addr_change(struct ieee80211_hw *hw, struct inet6_dev *idev) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - struct mt792x_dev *dev = mvif->phy->dev; + struct mt792x_dev *dev = mt792x_hw_dev(hw); struct inet6_ifaddr *ifa; struct in6_addr ns_addrs[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; struct sk_buff *skb; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c index 700c6e2bcad19468e61075fa9b836348de0f637e..ff458fb8514daee51bee4cda46d024d3144355e4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c @@ -181,11 +181,11 @@ static void _rtl92du_init_queue_reserved_page(struct ieee80211_hw *hw, struct rtl_hal *rtlhal = rtl_hal(rtlpriv); u32 txqpagenum, txqpageunit; u32 txqremainingpage; + u32 value32 = 0; u32 numhq = 0; u32 numlq = 0; u32 numnq = 0; u32 numpubq; - u32 value32; if (rtlhal->macphymode != SINGLEMAC_SINGLEPHY) { numpubq = NORMAL_PAGE_NUM_PUBQ_92D_DUAL_MAC; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 9fe664960b38032d74fb6544b998ab1c92e198fc..e2a6575b9ff7f8d55aaec1f683b93e703844a2dd 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -126,8 +126,6 @@ struct pn544_i2c_fw_secure_blob { #define PN544_FW_CMD_RESULT_COMMAND_REJECTED 0xE0 #define PN544_FW_CMD_RESULT_CHUNK_ERROR 0xE6 -#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) - #define PN544_FW_WRITE_BUFFER_MAX_LEN 0x9f7 #define PN544_FW_I2C_MAX_PAYLOAD PN544_HCI_I2C_LLC_MAX_SIZE #define PN544_FW_I2C_WRITE_FRAME_HEADER_LEN 8 diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1ae8b2351654e7a7315c67025d9c453631a621f7..210fb77f51ba026d943bbb95eefce8abecdd344c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -498,7 +498,7 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; - if (is_nd_pfn(dev)) + if (is_nd_pfn(dev) || pmem_should_map_pages(dev)) lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 19917253ba7bae13fa74028f537ce472e8abfc42..33fa01c599adda3e03fef6b63f547da846c44609 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -36,6 +36,7 @@ struct nvme_ns_info { struct nvme_ns_ids ids; u32 nsid; __le32 anagrpid; + u8 pi_offset; bool is_shared; bool is_readonly; bool is_ready; @@ -1757,8 +1758,8 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, - struct queue_limits *lim) +static bool nvme_init_integrity(struct nvme_ns_head *head, + struct queue_limits *lim, struct nvme_ns_info *info) { struct blk_integrity *bi = &lim->integrity; @@ -1816,7 +1817,7 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, } bi->tuple_size = head->ms; - bi->pi_offset = head->pi_offset; + bi->pi_offset = info->pi_offset; return true; } @@ -1876,12 +1877,18 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) { u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]); + u8 guard_type; /* no support for storage tag formats right now */ if (nvme_elbaf_sts(elbaf)) return; - head->guard_type = nvme_elbaf_guard_type(elbaf); + guard_type = nvme_elbaf_guard_type(elbaf); + if ((nvm->pic & NVME_ID_NS_NVM_QPIFS) && + guard_type == NVME_NVM_NS_QTYPE_GUARD) + guard_type = nvme_elbaf_qualified_guard_type(elbaf); + + head->guard_type = guard_type; switch (head->guard_type) { case NVME_NVM_NS_64B_GUARD: head->pi_size = sizeof(struct crc64_pi_tuple); @@ -1896,12 +1903,11 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, static void nvme_configure_metadata(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id, - struct nvme_id_ns_nvm *nvm) + struct nvme_id_ns_nvm *nvm, struct nvme_ns_info *info) { head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); head->pi_type = 0; head->pi_size = 0; - head->pi_offset = 0; head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; @@ -1916,7 +1922,7 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - head->pi_offset = head->ms - head->pi_size; + info->pi_offset = head->ms - head->pi_size; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -2150,7 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; @@ -2170,7 +2176,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head, &lim)) + if (!nvme_init_integrity(ns->head, &lim, info)) capacity = 0; ret = queue_limits_commit_update(ns->disk->queue, &lim); @@ -2274,7 +2280,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else - nvme_init_integrity(ns->head->disk, ns->head, &lim); + nvme_init_integrity(ns->head, &lim, info); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 44e342a46f39f9d1f7ae985e73ae9289cb134da0..f5f545fa0103547444f5531434b9bdea3e3a521e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1403,10 +1403,10 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) tok = &opt_tokens[idx]; if (tok->token == NVMF_OPT_ERR) continue; - seq_puts(seq_file, ","); + seq_putc(seq_file, ','); seq_puts(seq_file, tok->pattern); } - seq_puts(seq_file, "\n"); + seq_putc(seq_file, '\n'); } static int nvmf_dev_show(struct seq_file *seq_file, void *private) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f900e44243aef0f910cf31e2c41b1d844e65d145..ae5314d32943e8c9ba43c0d287de411fbd201679 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -462,20 +462,19 @@ struct nvme_ns_head { struct srcu_struct srcu; struct nvme_subsystem *subsys; struct nvme_ns_ids ids; + u8 lba_shift; + u16 ms; + u16 pi_size; + u8 pi_type; + u8 guard_type; struct list_head entry; struct kref ref; bool shared; bool passthru_err_log_enabled; - int instance; struct nvme_effects_log *effects; u64 nuse; unsigned ns_id; - int lba_shift; - u16 ms; - u16 pi_size; - u8 pi_type; - u8 pi_offset; - u8 guard_type; + int instance; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 21f8e1b9801fcf80934119d757aac58516801ce1..6cd9395ba9ec3d8044b4eaf0e6665c7d739d718d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -863,7 +863,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) nvme_start_request(req); return BLK_STS_OK; out_unmap_data: - nvme_unmap_data(dev, req); + if (blk_rq_nr_phys_segments(req)) + nvme_unmap_data(dev, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; @@ -1309,7 +1310,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) dev_warn(dev->ctrl.device, "Does your device have a faulty power saving mode enabled?\n"); dev_warn(dev->ctrl.device, - "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n"); + "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off pcie_port_pm=off\" and report a bug\n"); } static enum blk_eh_timer_return nvme_timeout(struct request *req) @@ -2968,6 +2969,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } + /* + * NVMe SSD drops off the PCIe bus after system idle + * for 10 hours on a Lenovo N60z board. + */ + if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6")) + return NVME_QUIRK_NO_APST; + return 0; } diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 3c55f7edd181939fc8a37b47e0ce56fdb271e434..ba05faaac562dc7465a789a011649e4dc909136b 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -233,13 +233,12 @@ static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, { struct nvme_ns_head *head = dev_to_ns_head(dev); struct gendisk *disk = dev_to_disk(dev); - struct block_device *bdev = disk->part0; int ret; - if (nvme_disk_is_ns_head(bdev->bd_disk)) + if (nvme_disk_is_ns_head(disk)) ret = ns_head_update_nuse(head); else - ret = ns_update_nuse(bdev->bd_disk->private_data); + ret = ns_update_nuse(disk->private_data); if (ret) return ret; diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb323b033a9c635935e88c2f14878b..8fd63100ba8f0819aac78a7917714709ca6aee28 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 061f01f60db4b9978284a77e6503a043972cc712..736ad8baa2a555c02e9315b6011ecc85a5221738 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -485,7 +485,9 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot, struct pci_dev *pdev = ctrl_dev(ctrl); pci_config_pm_runtime_get(pdev); - pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC, status), + + /* Attention and Power Indicator Control bits are supported */ + pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC, status), PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC); pci_config_pm_runtime_put(pdev); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3a49f66982d52fc8a30f3dbf387a2ea95be1deb..ffaaca0978cbc95fbb7566e9700b375cdb4940cc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4477,12 +4477,6 @@ void pci_intx(struct pci_dev *pdev, int enable) { u16 pci_command, new; - /* Preserve the "hybrid" behavior for backwards compatibility */ - if (pci_is_managed(pdev)) { - WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); - return; - } - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); if (enable) @@ -4490,8 +4484,15 @@ void pci_intx(struct pci_dev *pdev, int enable) else new = pci_command | PCI_COMMAND_INTX_DISABLE; - if (new != pci_command) + if (new != pci_command) { + /* Preserve the "hybrid" behavior for backwards compatibility */ + if (pci_is_managed(pdev)) { + WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); + return; + } + pci_write_config_word(pdev, PCI_COMMAND, new); + } } EXPORT_SYMBOL_GPL(pci_intx); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 27b42ca509bd22e7368a815a976669f114827178..31a17a56eb3bba1b2d70e6443f37197ef3d78554 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #define ALT_SBI_PMU_OVERFLOW(__ovl) \ asm volatile(ALTERNATIVE_2( \ @@ -33,7 +35,8 @@ asm volatile(ALTERNATIVE_2( \ THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ CONFIG_ERRATA_THEAD_PMU, \ "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \ - 0, RISCV_ISA_EXT_XANDESPMU, \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ CONFIG_ANDES_CUSTOM_PMU) \ : "=r" (__ovl) : \ : "memory") @@ -42,7 +45,8 @@ asm volatile(ALTERNATIVE_2( \ asm volatile(ALTERNATIVE( \ "csrc " __stringify(CSR_IP) ", %0\n\t", \ "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \ - 0, RISCV_ISA_EXT_XANDESPMU, \ + ANDES_VENDOR_ID, \ + RISCV_ISA_VENDOR_EXT_XANDESPMU + RISCV_VENDOR_EXT_ALTERNATIVES_BASE, \ CONFIG_ANDES_CUSTOM_PMU) \ : : "r"(__irq_mask) \ : "memory") @@ -412,7 +416,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) * but not in the user access mode as we want to use the other counters * that support sampling/filtering. */ - if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) { if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; cmask = 1; @@ -1095,7 +1099,8 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde riscv_cached_mimpid(0) == 0) { riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; riscv_pmu_use_irq = true; - } else if (riscv_isa_extension_available(NULL, XANDESPMU) && + } else if (riscv_has_vendor_extension_unlikely(ANDES_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XANDESPMU) && IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) { riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI; riscv_pmu_use_irq = true; diff --git a/drivers/pinctrl/nxp/pinctrl-s32cc.c b/drivers/pinctrl/nxp/pinctrl-s32cc.c index df3e5d82da4b3de934bb2abe26aba208ba2ef452..f2609a35c312c87af69752630debbbec65d4ad4d 100644 --- a/drivers/pinctrl/nxp/pinctrl-s32cc.c +++ b/drivers/pinctrl/nxp/pinctrl-s32cc.c @@ -730,9 +730,7 @@ static int s32_pinctrl_parse_groups(struct device_node *np, struct s32_pin_group *grp, struct s32_pinctrl_soc_info *info) { - const __be32 *p; struct device *dev; - struct property *prop; unsigned int *pins, *sss; int i, npins; u32 pinmux; @@ -763,7 +761,7 @@ static int s32_pinctrl_parse_groups(struct device_node *np, return -ENOMEM; i = 0; - of_property_for_each_u32(np, "pinmux", prop, p, pinmux) { + of_property_for_each_u32(np, "pinmux", pinmux) { pins[i] = get_pin_no(pinmux); sss[i] = get_pin_func(pinmux); diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index 2753e14c3e38a95ffd7a1edf8e957162dbad5458..a898e40451fe4baceedbae2a0e7f498e29d9a379 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -763,8 +763,6 @@ static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned int *reserved_maps, unsigned int *num_maps) { - struct property *prop; - const __be32 *p; int ret, pinmux_groups; u32 pinmux_group; unsigned long *configs = NULL; @@ -797,7 +795,7 @@ static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, if (ret < 0) goto exit; - of_property_for_each_u32(np, "pinmux", prop, p, pinmux_group) { + of_property_for_each_u32(np, "pinmux", pinmux_group) { const char *group_name, *func_name; u32 pin = FIELD_GET(K210_PG_PIN, pinmux_group); u32 func = FIELD_GET(K210_PG_FUNC, pinmux_group); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index f776fd42244f4cb3489e95456cf3e82f24c7badf..73f75958e15c10f07f7495c2aa43f7f33ec53caa 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -813,9 +813,11 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, if (ret == -ENOPROTOOPT) { dev_dbg(ec_dev->dev, "GET_NEXT_EVENT returned invalid version error.\n"); + mutex_lock(&ec_dev->lock); ret = cros_ec_get_host_command_version_mask(ec_dev, EC_CMD_GET_NEXT_EVENT, &ver_mask); + mutex_unlock(&ec_dev->lock); if (ret < 0 || ver_mask == 0) /* * Do not change the MKBP supported version if we can't diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index cb0d4d686d8a5123e2f0511a03af343e5854bd74..a111eca8ff57d2b291d1d2388ef32e0d34a402e5 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -16,35 +16,65 @@ config TURRIS_OMNIA_MCU tristate "Turris Omnia MCU driver" depends on MACH_ARMADA_38X || COMPILE_TEST depends on I2C - depends on OF - depends on WATCHDOG - depends on GPIOLIB - depends on HW_RANDOM - depends on RTC_CLASS - depends on WATCHDOG_CORE - select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the microcontroller on the CZ.NIC's Turris Omnia SOHO router. - The features include: - - board poweroff into true low power mode (with voltage regulators - disabled) and the ability to configure wake up from this mode (via - rtcwake) - - true random number generator (if available on the MCU) - - MCU watchdog - - GPIO pins - - to get front button press events (the front button can be - configured either to generate press events to the CPU or to change - front LEDs panel brightness) - - to enable / disable USB port voltage regulators and to detect - USB overcurrent - - to detect MiniPCIe / mSATA card presence in MiniPCIe port 0 - - to configure resets of various peripherals on board revisions 32+ - - to enable / disable the VHV voltage regulator to the SOC in order - to be able to program SOC's OTP on board revisions 32+ - - to get input from the LED output pins of the WAN ethernet PHY, LAN - switch and MiniPCIe ports + This option only enables the core part of the driver. Specific + features can be enabled by subsequent config options. To compile this driver as a module, choose M here; the module will be called turris-omnia-mcu. +if TURRIS_OMNIA_MCU + +config TURRIS_OMNIA_MCU_GPIO + bool "Turris Omnia MCU GPIOs" + default y + depends on GPIOLIB + depends on OF + select GPIOLIB_IRQCHIP + help + Say Y here to add support for controlling MCU GPIO pins and receiving + MCU interrupts on CZ.NIC's Turris Omnia. + This enables you to + - get front button press events (the front button can be configured + either to generate press events to the CPU or to change front LEDs + panel brightness), + - enable / disable USB port voltage regulators and to detect USB + overcurrent, + - detect MiniPCIe / mSATA card presence in MiniPCIe port 0, + - configure resets of various peripherals on board revisions 32+, + - enable / disable the VHV voltage regulator to the SOC in order to be + able to program SOC's OTP on board revisions 32+, + - get input from the LED output pins of the WAN ethernet PHY, LAN + switch and MiniPCIe ports. + +config TURRIS_OMNIA_MCU_SYSOFF_WAKEUP + bool "Turris Omnia MCU system off and RTC wakeup" + default y + depends on RTC_CLASS + help + Say Y here to add support for CZ.NIC's Turris Omnia board poweroff + into true low power mode (with voltage regulators disabled) and the + ability to configure wake up from this mode (via rtcwake). + +config TURRIS_OMNIA_MCU_WATCHDOG + bool "Turris Omnia MCU watchdog" + default y + depends on WATCHDOG + select WATCHDOG_CORE + help + Say Y here to add support for watchdog provided by CZ.NIC's Turris + Omnia MCU. + +config TURRIS_OMNIA_MCU_TRNG + bool "Turris Omnia MCU true random number generator" + default y + depends on TURRIS_OMNIA_MCU_GPIO + depends on HW_RANDOM + help + Say Y here to add support for the true random number generator + provided by CZ.NIC's Turris Omnia MCU. + +endif # TURRIS_OMNIA_MCU + endif # CZNIC_PLATFORMS diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index eae4c6b341fff776fd11759625a9eda4f28f42bd..ce6d997f34d66e943d44c1993cce63142e00ffbf 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o -turris-omnia-mcu-y += turris-omnia-mcu-gpio.o -turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o -turris-omnia-mcu-y += turris-omnia-mcu-trng.o -turris-omnia-mcu-y += turris-omnia-mcu-watchdog.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_GPIO) += turris-omnia-mcu-gpio.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP) += turris-omnia-mcu-sys-off-wakeup.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index c68a7a84a9519a65005b23292bb32848acc50342..58f9afae28673753eed1ad5aebb51529ab8849e6 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -197,8 +197,12 @@ static const struct attribute_group omnia_mcu_base_group = { static const struct attribute_group *omnia_mcu_groups[] = { &omnia_mcu_base_group, +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO &omnia_mcu_gpio_group, +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP &omnia_mcu_poweroff_group, +#endif NULL }; diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 2ca56ae13aa908c7fcf3782a336911f0716050cb..fed0d357fea39c0adb8eb1ee440ffaaae984a976 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -33,6 +33,7 @@ struct omnia_mcu { u8 board_first_mac[ETH_ALEN]; u8 board_revision; +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO /* GPIO chip */ struct gpio_chip gc; struct mutex lock; @@ -41,18 +42,25 @@ struct omnia_mcu { struct delayed_work button_release_emul_work; unsigned long last_status; bool button_pressed_emul; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP /* RTC device for configuring wake-up */ struct rtc_device *rtcdev; u32 rtc_alarm; bool front_button_poweron; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG /* MCU watchdog */ struct watchdog_device wdt; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG /* true random number generator */ struct hwrng trng; struct completion trng_entropy_ready; +#endif }; int omnia_cmd_write_read(const struct i2c_client *client, @@ -182,13 +190,43 @@ static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); } +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; -extern const struct attribute_group omnia_mcu_poweroff_group; - int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP +extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG int omnia_mcu_register_trng(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_trng(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG int omnia_mcu_register_watchdog(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_watchdog(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #endif /* __TURRIS_OMNIA_MCU_H */ diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487d42542338f4c767cac28b34fcd31..7e89f547999b2a00b0d406c005df83b3c2fcc1d9 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9546f09e4824b904159eddc86319b..a23dff35f8ca23fc7788d133d1a4f9182cffdaf5 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,16 +265,34 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, &ssam_node_hid_sam_ucm_ucsi, NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +307,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -324,7 +358,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +399,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, @@ -389,14 +426,23 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 665fa9524986595b02826b3645a2f4172b5c048e..ddfccc226751f4dcea9d9b3fee50a16d88c6cdd9 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -477,6 +477,7 @@ config LENOVO_YMC tristate "Lenovo Yoga Tablet Mode Control" depends on ACPI_WMI depends on INPUT + depends on IDEAPAD_LAPTOP select INPUT_SPARSEKMAP help This driver maps the Tablet Mode Control switch to SW_TABLET_MODE input diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a3d881f6e5d904dd4104ff8582b8a42c03188e4a..c3e51f0a5c33e2160de5f5aabc037c7863a9b30a 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -764,6 +764,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return MSG_OS_HINT_RN; } return -EINVAL; @@ -967,6 +968,7 @@ static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index 9e32d3128c3a2bd2e6920f3ad60f1fc3d48dd9bf..f1166d15c856203bdb4bf7a7cd634dd68dcf41bd 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -67,6 +67,7 @@ void amd_mp2_stb_deinit(struct amd_pmc_dev *dev); #define AMD_CPU_ID_PS 0x14E8 #define AMD_CPU_ID_SP 0x14A4 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PCI_DEVICE_ID_AMD_MP2_STB 0x172c #endif /* PMC_H */ diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 2d6e2558863c583237dbd6219d515c15b60c293d..8f1f719befa3e7105bb58c8edf77febaca9b545d 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -41,6 +41,7 @@ #define AMD_CPU_ID_RMB 0x14b5 #define AMD_CPU_ID_PS 0x14e8 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PMF_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -249,6 +250,7 @@ static const struct pci_device_id pmf_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RMB) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; @@ -382,6 +384,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = { {"AMDI0102", 0}, {"AMDI0103", 0}, {"AMDI0105", 0}, + {"AMDI0107", 0}, { } }; MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids); diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c index 0b2eb0ae85febdfcc62fbffddf231889f7842e22..460444cda1b295211eb419b1f9ba65d9900ec984 100644 --- a/drivers/platform/x86/amd/pmf/pmf-quirks.c +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -29,6 +29,14 @@ static const struct dmi_system_id fwbug_list[] = { }, .driver_data = &quirk_no_sps_bug, }, + { + .ident = "ROG Ally X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "RC72LA"), + }, + .driver_data = &quirk_no_sps_bug, + }, {} }; @@ -48,4 +56,3 @@ void amd_pmf_quirks_init(struct amd_pmf_dev *dev) dmi_id->ident); } } - diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a3dec14c30043ecc9c1d109247452d6d19949976..3c153fb1425e9f6c7fb5039919e40e422f2dbba4 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -150,36 +150,26 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } -static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +static void amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { struct amd_sfh_info sfh_info; - int ret; + + /* Get the latest information from SFH */ + in->ev_info.user_present = false; /* Get ALS data */ - ret = amd_get_sfh_info(&sfh_info, MT_ALS); - if (!ret) + if (!amd_get_sfh_info(&sfh_info, MT_ALS)) in->ev_info.ambient_light = sfh_info.ambient_light; else - return ret; + dev_dbg(dev->dev, "ALS is not enabled/detected\n"); /* get HPD data */ - ret = amd_get_sfh_info(&sfh_info, MT_HPD); - if (ret) - return ret; - - switch (sfh_info.user_present) { - case SFH_NOT_DETECTED: - in->ev_info.user_present = 0xff; /* assume no sensors connected */ - break; - case SFH_USER_PRESENT: - in->ev_info.user_present = 1; - break; - case SFH_USER_AWAY: - in->ev_info.user_present = 0; - break; + if (!amd_get_sfh_info(&sfh_info, MT_HPD)) { + if (sfh_info.user_present == SFH_USER_PRESENT) + in->ev_info.user_present = true; + } else { + dev_dbg(dev->dev, "HPD is not enabled/detected\n"); } - - return 0; } void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b93be2329e2039d22994456a16584..37636e5a38e3b52f8c46bcb0344693fb1a7c0708 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1ace711f7442e2860097d20408adc134ecb6c644..98ec30fce9fdd3c20d56d825e048fd65bfd0b143 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -126,6 +126,7 @@ struct ideapad_rfk_priv { struct ideapad_private { struct acpi_device *adev; + struct mutex vpc_mutex; /* protects the VPC calls */ struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct ideapad_rfk_priv rfk_priv[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; @@ -146,6 +147,7 @@ struct ideapad_private { bool touchpad_ctrl_via_ec : 1; bool ctrl_ps2_aux_port : 1; bool usb_charging : 1; + bool ymc_ec_trigger : 1; } features; struct { bool initialized; @@ -194,6 +196,12 @@ MODULE_PARM_DESC(touchpad_ctrl_via_ec, "Enable registering a 'touchpad' sysfs-attribute which can be used to manually " "tell the EC to enable/disable the touchpad. This may not work on all models."); +static bool ymc_ec_trigger __read_mostly; +module_param(ymc_ec_trigger, bool, 0444); +MODULE_PARM_DESC(ymc_ec_trigger, + "Enable EC triggering work-around to force emitting tablet mode events. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + /* * shared data */ @@ -294,6 +302,8 @@ static int debugfs_status_show(struct seq_file *s, void *data) struct ideapad_private *priv = s->private; unsigned long value; + guard(mutex)(&priv->vpc_mutex); + if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL_MAX, &value)) seq_printf(s, "Backlight max: %lu\n", value); if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL, &value)) @@ -412,7 +422,8 @@ static ssize_t camera_power_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); if (err) return err; @@ -431,7 +442,8 @@ static ssize_t camera_power_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); if (err) return err; @@ -484,7 +496,8 @@ static ssize_t fan_mode_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); if (err) return err; @@ -506,7 +519,8 @@ static ssize_t fan_mode_store(struct device *dev, if (state > 4 || state == 3) return -EINVAL; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); if (err) return err; @@ -591,7 +605,8 @@ static ssize_t touchpad_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); if (err) return err; @@ -612,7 +627,8 @@ static ssize_t touchpad_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); if (err) return err; @@ -1005,6 +1021,8 @@ static int ideapad_rfk_set(void *data, bool blocked) struct ideapad_rfk_priv *priv = data; int opcode = ideapad_rfk_data[priv->dev].opcode; + guard(mutex)(&priv->priv->vpc_mutex); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } @@ -1018,6 +1036,8 @@ static void ideapad_sync_rfk_state(struct ideapad_private *priv) int i; if (priv->features.hw_rfkill_switch) { + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_RF, &hw_blocked)) return; hw_blocked = !hw_blocked; @@ -1191,8 +1211,9 @@ static void ideapad_input_novokey(struct ideapad_private *priv) { unsigned long long_pressed; - if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) + return; if (long_pressed) ideapad_input_report(priv, 17); @@ -1204,8 +1225,9 @@ static void ideapad_check_special_buttons(struct ideapad_private *priv) { unsigned long bit, value; - if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) + return; for_each_set_bit (bit, &value, 16) { switch (bit) { @@ -1238,6 +1260,8 @@ static int ideapad_backlight_get_brightness(struct backlight_device *blightdev) unsigned long now; int err; + guard(mutex)(&priv->vpc_mutex); + err = read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); if (err) return err; @@ -1250,6 +1274,8 @@ static int ideapad_backlight_update_status(struct backlight_device *blightdev) struct ideapad_private *priv = bl_get_data(blightdev); int err; + guard(mutex)(&priv->vpc_mutex); + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_BL, blightdev->props.brightness); if (err) @@ -1327,6 +1353,8 @@ static void ideapad_backlight_notify_power(struct ideapad_private *priv) if (!blightdev) return; + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_BL_POWER, &power)) return; @@ -1339,7 +1367,8 @@ static void ideapad_backlight_notify_brightness(struct ideapad_private *priv) /* if we control brightness via acpi video driver */ if (!priv->blightdev) - read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); + scoped_guard(mutex, &priv->vpc_mutex) + read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); else backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY); } @@ -1564,7 +1593,8 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ int ret; /* Without reading from EC touchpad LED doesn't switch state */ - ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); + scoped_guard(mutex, &priv->vpc_mutex) + ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); if (ret) return; @@ -1592,16 +1622,92 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ priv->r_touchpad_val = value; } +static const struct dmi_system_id ymc_ec_trigger_quirk_dmi_table[] = { + { + /* Lenovo Yoga 7 14ARB7 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + }, + }, + { + /* Lenovo Yoga 7 14ACN6 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), + }, + }, + { } +}; + +static void ideapad_laptop_trigger_ec(void) +{ + struct ideapad_private *priv; + int ret; + + guard(mutex)(&ideapad_shared_mutex); + + priv = ideapad_shared; + if (!priv) + return; + + if (!priv->features.ymc_ec_trigger) + return; + + scoped_guard(mutex, &priv->vpc_mutex) + ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); + if (ret) + dev_warn(&priv->platform_device->dev, "Could not write YMC: %d\n", ret); +} + +static int ideapad_laptop_nb_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + switch (action) { + case IDEAPAD_LAPTOP_YMC_EVENT: + ideapad_laptop_trigger_ec(); + break; + } + + return 0; +} + +static struct notifier_block ideapad_laptop_notifier = { + .notifier_call = ideapad_laptop_nb_notify, +}; + +static BLOCKING_NOTIFIER_HEAD(ideapad_laptop_chain_head); + +int ideapad_laptop_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_register_notifier, IDEAPAD_LAPTOP); + +int ideapad_laptop_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_unregister_notifier, IDEAPAD_LAPTOP); + +void ideapad_laptop_call_notifier(unsigned long action, void *data) +{ + blocking_notifier_call_chain(&ideapad_laptop_chain_head, action, data); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_call_notifier, IDEAPAD_LAPTOP); + static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) { struct ideapad_private *priv = data; unsigned long vpc1, vpc2, bit; - if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) - return; + scoped_guard(mutex, &priv->vpc_mutex) { + if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) + return; - if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) - return; + if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) + return; + } vpc1 = (vpc2 << 8) | vpc1; @@ -1728,6 +1834,8 @@ static void ideapad_check_features(struct ideapad_private *priv) priv->features.ctrl_ps2_aux_port = ctrl_ps2_aux_port || dmi_check_system(ctrl_ps2_aux_port_list); priv->features.touchpad_ctrl_via_ec = touchpad_ctrl_via_ec; + priv->features.ymc_ec_trigger = + ymc_ec_trigger || dmi_check_system(ymc_ec_trigger_quirk_dmi_table); if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) priv->features.fan_mode = true; @@ -1906,6 +2014,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->adev = adev; priv->platform_device = pdev; + err = devm_mutex_init(&pdev->dev, &priv->vpc_mutex); + if (err) + return err; + ideapad_check_features(priv); err = ideapad_sysfs_init(priv); @@ -1974,6 +2086,8 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (err) goto shared_init_failed; + ideapad_laptop_register_notifier(&ideapad_laptop_notifier); + return 0; shared_init_failed: @@ -2006,6 +2120,8 @@ static void ideapad_acpi_remove(struct platform_device *pdev) struct ideapad_private *priv = dev_get_drvdata(&pdev->dev); int i; + ideapad_laptop_unregister_notifier(&ideapad_laptop_notifier); + ideapad_shared_exit(priv); acpi_remove_notify_handler(priv->adev->handle, diff --git a/drivers/platform/x86/ideapad-laptop.h b/drivers/platform/x86/ideapad-laptop.h index 4498a96de59769e7ed9db5e021b3ed856a121a62..948cc61800a950fff5298af64d69c2352b132b3a 100644 --- a/drivers/platform/x86/ideapad-laptop.h +++ b/drivers/platform/x86/ideapad-laptop.h @@ -12,6 +12,15 @@ #include #include #include +#include + +enum ideapad_laptop_notifier_actions { + IDEAPAD_LAPTOP_YMC_EVENT, +}; + +int ideapad_laptop_register_notifier(struct notifier_block *nb); +int ideapad_laptop_unregister_notifier(struct notifier_block *nb); +void ideapad_laptop_call_notifier(unsigned long action, void *data); enum { VPCCMD_R_VPC1 = 0x10, diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 282e4bfe30da326c47156219a5b74790047ab4e3..be3d51ed0e4740eca7c3885b97d591587e747785 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -221,8 +221,8 @@ static int doscan(void *data) */ static void ifs_test_core(int cpu, struct device *dev) { + union ifs_status status = {}; union ifs_scan activate; - union ifs_status status; unsigned long timeout; struct ifs_data *ifsd; int to_start, to_stop; diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6ef48c2d6607c7f75c022ce9dbdf4e..4045823071091a1af76d3dfc40481dc1b80e4788 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 9b7ce03ba085c245ce04c4eb8d2eaafa969b2970..a353e830b65fd1c3f2082aa2950a608895592873 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -7,11 +7,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include "../dual_accel_detect.h" @@ -66,6 +68,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { }; struct intel_vbtn_priv { + struct mutex mutex; /* Avoid notify_handler() racing with itself */ struct input_dev *buttons_dev; struct input_dev *switches_dev; bool dual_accel; @@ -155,6 +158,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) bool autorelease; int ret; + guard(mutex)(&priv->mutex); + if ((ke = sparse_keymap_entry_from_scancode(priv->buttons_dev, event))) { if (!priv->has_buttons) { dev_warn(&device->dev, "Warning: received 0x%02x button event on a device without buttons, please report this.\n", @@ -290,6 +295,10 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + err = devm_mutex_init(&device->dev, &priv->mutex); + if (err) + return err; + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c index e1fbc35504d4985e0949d051ce1231584a544c42..e0bbd6a14a89cb0b0b968e0003299360e66f311c 100644 --- a/drivers/platform/x86/lenovo-ymc.c +++ b/drivers/platform/x86/lenovo-ymc.c @@ -20,32 +20,10 @@ #define LENOVO_YMC_QUERY_INSTANCE 0 #define LENOVO_YMC_QUERY_METHOD 0x01 -static bool ec_trigger __read_mostly; -module_param(ec_trigger, bool, 0444); -MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events"); - static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type"); -static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { - { - /* Lenovo Yoga 7 14ARB7 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), - }, - }, - { - /* Lenovo Yoga 7 14ACN6 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), - }, - }, - { } -}; - static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { { .matches = { @@ -62,21 +40,8 @@ static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { struct lenovo_ymc_private { struct input_dev *input_dev; - struct acpi_device *ec_acpi_dev; }; -static void lenovo_ymc_trigger_ec(struct wmi_device *wdev, struct lenovo_ymc_private *priv) -{ - int err; - - if (!priv->ec_acpi_dev) - return; - - err = write_ec_cmd(priv->ec_acpi_dev->handle, VPCCMD_W_YMC, 1); - if (err) - dev_warn(&wdev->dev, "Could not write YMC: %d\n", err); -} - static const struct key_entry lenovo_ymc_keymap[] = { /* Laptop */ { KE_SW, 0x01, { .sw = { SW_TABLET_MODE, 0 } } }, @@ -125,11 +90,9 @@ static void lenovo_ymc_notify(struct wmi_device *wdev, union acpi_object *data) free_obj: kfree(obj); - lenovo_ymc_trigger_ec(wdev, priv); + ideapad_laptop_call_notifier(IDEAPAD_LAPTOP_YMC_EVENT, &code); } -static void acpi_dev_put_helper(void *p) { acpi_dev_put(p); } - static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) { struct lenovo_ymc_private *priv; @@ -143,29 +106,10 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) return -ENODEV; } - ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table); - priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - if (ec_trigger) { - pr_debug("Lenovo YMC enable EC triggering.\n"); - priv->ec_acpi_dev = acpi_dev_get_first_match_dev("VPC2004", NULL, -1); - - if (!priv->ec_acpi_dev) { - dev_err(&wdev->dev, "Could not find EC ACPI device.\n"); - return -ENODEV; - } - err = devm_add_action_or_reset(&wdev->dev, - acpi_dev_put_helper, priv->ec_acpi_dev); - if (err) { - dev_err(&wdev->dev, - "Could not clean up EC ACPI device: %d\n", err); - return err; - } - } - input_dev = devm_input_allocate_device(&wdev->dev); if (!input_dev) return -ENOMEM; @@ -192,7 +136,6 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) dev_set_drvdata(&wdev->dev, priv); /* Report the state for the first time on probe */ - lenovo_ymc_trigger_ec(wdev, priv); lenovo_ymc_notify(wdev, NULL); return 0; } @@ -217,3 +160,4 @@ module_wmi_driver(lenovo_ymc_driver); MODULE_AUTHOR("Gergo Koteles "); MODULE_DESCRIPTION("Lenovo Yoga Mode Control driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IDEAPAD_LAPTOP); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 3e94fdd1ea52e035588847b6578b43209aa93bc5..3197aaa69da73dbde21be0c3da2d0bd8011cdcf3 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -757,7 +757,6 @@ static union acpi_object *__call_snc_method(acpi_handle handle, char *method, return result; } -#define MIN(a, b) (a > b ? b : a) static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value, void *buffer, size_t buflen) { diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index b5903193e2f96d494a6aaccb515977a74b21a4c8..ac05942e4e6ac1f5610a7217857465136a695601 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -178,18 +178,18 @@ static inline int axp288_charger_set_cv(struct axp288_chrg_info *info, int cv) u8 reg_val; int ret; - if (cv <= CV_4100MV) { - reg_val = CHRG_CCCV_CV_4100MV; - cv = CV_4100MV; - } else if (cv <= CV_4150MV) { - reg_val = CHRG_CCCV_CV_4150MV; - cv = CV_4150MV; - } else if (cv <= CV_4200MV) { + if (cv >= CV_4350MV) { + reg_val = CHRG_CCCV_CV_4350MV; + cv = CV_4350MV; + } else if (cv >= CV_4200MV) { reg_val = CHRG_CCCV_CV_4200MV; cv = CV_4200MV; + } else if (cv >= CV_4150MV) { + reg_val = CHRG_CCCV_CV_4150MV; + cv = CV_4150MV; } else { - reg_val = CHRG_CCCV_CV_4350MV; - cv = CV_4350MV; + reg_val = CHRG_CCCV_CV_4100MV; + cv = CV_4100MV; } reg_val = reg_val << CHRG_CCCV_CV_BIT_POS; @@ -337,8 +337,8 @@ static int axp288_charger_usb_set_property(struct power_supply *psy, } break; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: - scaled_val = min(val->intval, info->max_cv); - scaled_val = DIV_ROUND_CLOSEST(scaled_val, 1000); + scaled_val = DIV_ROUND_CLOSEST(val->intval, 1000); + scaled_val = min(scaled_val, info->max_cv); ret = axp288_charger_set_cv(info, scaled_val); if (ret < 0) { dev_warn(&info->pdev->dev, "set charge voltage failed\n"); diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 46f36dcb185c366aec6652017e1c64f9b0822ae1..49bef4a5ac3f3e805f65d6b505a394d78b4012a0 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -486,7 +486,7 @@ static int qcom_battmgr_bat_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -683,7 +683,7 @@ static int qcom_battmgr_ac_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); if (ret) @@ -748,7 +748,7 @@ static int qcom_battmgr_usb_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -867,7 +867,7 @@ static int qcom_battmgr_wls_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -1007,7 +1007,9 @@ static void qcom_battmgr_sc8280xp_callback(struct qcom_battmgr *battmgr, battmgr->error = 0; break; case BATTMGR_BAT_INFO: - if (payload_len != sizeof(resp->info)) { + /* some firmware versions report an extra __le32 at the end of the payload */ + if (payload_len != sizeof(resp->info) && + payload_len != (sizeof(resp->info) + sizeof(__le32))) { dev_warn(battmgr->dev, "invalid payload length for battery information request: %zd\n", payload_len); diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 32eafe2c00af5177e0db8a31eab0d8d892e0d86c..7a27b262fb84a73db82b12e97c2fb57162709e14 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -159,6 +159,7 @@ static int rt5033_battery_probe(struct i2c_client *client) return -EINVAL; } + i2c_set_clientdata(client, battery); psy_cfg.of_node = client->dev.of_node; psy_cfg.drv_data = battery; diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 7adf4f2b1049a4a51cedab46bf41263b7f8607fd..951b38ff5f8ecb738c466d71a6d399b8f53b5c41 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -510,8 +510,6 @@ static int pwm_samsung_parse_dt(struct pwm_chip *chip) struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip); struct device_node *np = pwmchip_parent(chip)->of_node; const struct of_device_id *match; - struct property *prop; - const __be32 *cur; u32 val; match = of_match_node(samsung_pwm_matches, np); @@ -520,7 +518,7 @@ static int pwm_samsung_parse_dt(struct pwm_chip *chip) memcpy(&our_chip->variant, match->data, sizeof(our_chip->variant)); - of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) { + of_property_for_each_u32(np, "samsung,pwm-outputs", val) { if (val >= SAMSUNG_PWM_NUM) { dev_err(pwmchip_parent(chip), "%s: invalid channel index in samsung,pwm-outputs property\n", diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index e6a9027773fce8ff47f0587780642b5b5789f289..4b411a09c1a6708e2fd1775ca7b8328cda679b59 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1661,10 +1661,10 @@ config REGULATOR_UNIPHIER config REGULATOR_RZG2L_VBCTRL tristate "Renesas RZ/G2L USB VBUS regulator driver" - depends on ARCH_RZG2L || COMPILE_TEST + depends on RESET_RZG2L_USBPHY_CTRL || COMPILE_TEST depends on OF select REGMAP_MMIO - default ARCH_RZG2L + default RESET_RZG2L_USBPHY_CTRL help Support for VBUS regulators implemented on Renesas RZ/G2L SoCs. diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a97cfedd7060ace133224fe718d401cb4a7fda7..42a4a996defbe1e4e39c803340b98eb05472bd69 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct dasd_block *block, struct irb *irb) if (!sense) return 0; - return !!(sense[1] & SNS1_NO_REC_FOUND) || - !!(sense[1] & SNS1_FILE_PROTECTED) || - scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN; + if (sense[1] & SNS1_NO_REC_FOUND) + return 1; + + if ((sense[1] & SNS1_INV_TRACK_FORMAT) && + scsw_is_tm(&irb->scsw) && + !(sense[2] & SNS2_ENV_DATA_PRESENT)) + return 1; + + return 0; } static int dasd_ese_oos_cond(u8 *sense) @@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, struct dasd_device *device; unsigned long now; int nrf_suppressed = 0; - int fp_suppressed = 0; + int it_suppressed = 0; struct request *req; u8 *sense = NULL; int expires; @@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, */ sense = dasd_get_sense(irb); if (sense) { - fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); + it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) && test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); @@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } } - if (!(fp_suppressed || nrf_suppressed)) + if (!(it_suppressed || nrf_suppressed)) device->discipline->dump_sense_dbf(device, irb, "int"); if (device->features & DASD_FEATURE_ERPLOG) @@ -2459,14 +2466,17 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and error recovery would be - * unnecessary in these cases. Check if the according suppress - * bit is set. + * In some cases certain errors might be expected and + * error recovery would be unnecessary in these cases. + * Check if the according suppress bit is set. */ sense = dasd_get_sense(&cqr->irb); - if (sense && sense[1] & SNS1_FILE_PROTECTED && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags)) + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags)) + continue; + if (sense && (sense[1] & SNS1_NO_REC_FOUND) && + test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags)) continue; if (scsw_cstat(&cqr->irb.scsw) == 0x40 && test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags)) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index bbbacfc386f28d92ae2ab9e2091d8178328d6d55..d0aa267462c50a556f3eb718df9e29edb8fbb72b 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp) struct dasd_device *device = erp->startdev; - /* - * In some cases the 'File Protected' error might be expected and - * log messages shouldn't be written then. - * Check if the according suppress bit is set. - */ - if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags)) - dev_err(&device->cdev->dev, - "Accessing the DASD failed because of a hardware error\n"); + dev_err(&device->cdev->dev, + "Accessing the DASD failed because of a hardware error\n"); return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9388b5c383cab8e48e4d300fb5e41d0947f0c59d..90b106408992d0c793b02bfed7e5eec4a8d44bf9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2275,6 +2275,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); return cqr; } @@ -2556,7 +2557,6 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); return cqr; @@ -4130,8 +4130,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); } @@ -4633,9 +4631,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); } return cqr; @@ -5780,36 +5777,32 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, { u8 *sense = dasd_get_sense(irb); - if (scsw_is_tm(&irb->scsw)) { - /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and log messages shouldn't be written - * then. Check if the according suppress bit is set. - */ - if (sense && (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &req->flags)) - return; - if (scsw_cstat(&irb->scsw) == 0x40 && - test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) - return; + /* + * In some cases certain errors might be expected and + * log messages shouldn't be written then. + * Check if the according suppress bit is set. + */ + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &req->flags)) + return; - dasd_eckd_dump_sense_tcw(device, req, irb); - } else { - /* - * In some cases the 'Command Reject' or 'No Record Found' - * error might be expected and log messages shouldn't be - * written then. Check if the according suppress bit is set. - */ - if (sense && sense[0] & SNS0_CMD_REJECT && - test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) - return; + if (sense && sense[0] & SNS0_CMD_REJECT && + test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) + return; - if (sense && sense[1] & SNS1_NO_REC_FOUND && - test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) - return; + if (sense && sense[1] & SNS1_NO_REC_FOUND && + test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) + return; + if (scsw_cstat(&irb->scsw) == 0x40 && + test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) + return; + + if (scsw_is_tm(&irb->scsw)) + dasd_eckd_dump_sense_tcw(device, req, irb); + else dasd_eckd_dump_sense_ccw(device, req, irb); - } } static int dasd_eckd_reload_device(struct dasd_device *device) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 1aa426b1deddc7ba75c2728c5bfab93af6c71d5e..6da47a65af610f3ed1f83c81fc8511642593b358 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,7 +41,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) */ .max_segment_size = PAGE_SIZE, .seg_boundary_mask = PAGE_SIZE - 1, - .dma_alignment = PAGE_SIZE - 1, .max_segments = USHRT_MAX, }; struct gendisk *gdp; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e5f40536b4254027c3c181a6efb29e318fa72f7a..81cfb5c89681bc9612acac0f333b8e6d7e4896a8 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -196,7 +196,7 @@ struct dasd_ccw_req { * The following flags are used to suppress output of certain errors. */ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ -#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ +#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 8a03af5ee5b30015aa60e658c70e663776dd6478..80c4e5101c9712f2421e6ebec01b91153ebeaa82 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -96,7 +96,7 @@ config SCLP_OFB config S390_UV_UAPI def_tristate m prompt "Ultravisor userspace API" - depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST) + depends on S390 help Selecting exposes parts of the UV interface to userspace by providing a misc character device at /dev/uv. diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b72f672a7720a61885c3540231fe0da56b00ef7b..66b1bdc6328443eba021a60c1013f75a0a1d7120 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -550,4 +550,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) put_device(&gdev->dev); } EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_DESCRIPTION("ccwgroup bus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8ad49030a7bf3044e75cf43c51c10af9c71ce7bd..914dde0416753f2b3fc4042f9eda8e9b230cac83 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -488,4 +488,5 @@ static void __exit vfio_ccw_sch_exit(void) module_init(vfio_ccw_sch_init); module_exit(vfio_ccw_sch_exit); +MODULE_DESCRIPTION("VFIO based Subchannel device driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index d31884f82f2ad5daa4a0657059377824e609c8b4..73085d2f5c4344681aaa5adef25cb4ef82a211b3 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -65,11 +65,7 @@ #include "task.h" #include "probe_roms.h" -#define MAJ 1 -#define MIN 2 -#define BUILD 0 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ - __stringify(BUILD) +#define DRV_VERSION "1.2.0" MODULE_VERSION(DRV_VERSION); diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36e519b3dce897917c4bd92f8f261a..01f035f9330e4bcb4e26641737748130278a0a9c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 69b14918de59ea02aed9f6ac17d9843423dcc82c..616894571c6abe6758ef3cc648555d852941f058 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3575,6 +3575,17 @@ static int mpi3mr_prepare_sg_scmd(struct mpi3mr_ioc *mrioc, scmd->sc_data_direction); priv->meta_sg_valid = 1; /* To unmap meta sg DMA */ } else { + /* + * Some firmware versions byte-swap the REPORT ZONES command + * reply from ATA-ZAC devices by directly accessing in the host + * buffer. This does not respect the default command DMA + * direction and causes IOMMU page faults on some architectures + * with an IOMMU enforcing write mappings (e.g. AMD hosts). + * Avoid such issue by making the REPORT ZONES buffer mapping + * bi-directional. + */ + if (scmd->cmnd[0] == ZBC_IN && scmd->cmnd[1] == ZI_REPORT_ZONES) + scmd->sc_data_direction = DMA_BIDIRECTIONAL; sg_scmd = scsi_sglist(scmd); sges_left = scsi_dma_map(scmd); } @@ -5223,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b2bcf4a27ddcdec154889a7bb4ef5d255c1aedb2..b785a7e88b498cb3a0e778d7543d91a682633ca0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2671,6 +2671,22 @@ _base_build_zero_len_sge_ieee(struct MPT3SAS_ADAPTER *ioc, void *paddr) _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); } +static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) +{ + /* + * Some firmware versions byte-swap the REPORT ZONES command reply from + * ATA-ZAC devices by directly accessing in the host buffer. This does + * not respect the default command DMA direction and causes IOMMU page + * faults on some architectures with an IOMMU enforcing write mappings + * (e.g. AMD hosts). Avoid such issue by making the report zones buffer + * mapping bi-directional. + */ + if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) + cmd->sc_data_direction = DMA_BIDIRECTIONAL; + + return scsi_dma_map(cmd); +} + /** * _base_build_sg_scmd - main sg creation routine * pcie_device is unused here! @@ -2717,7 +2733,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc, sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; @@ -2861,7 +2877,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, } sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index adeaa8ab995167674909b496d2c4804d73e872c5..699f4f9674d98f67acbbf05f4a5398e8fcac0366 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2711,8 +2711,6 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; - - sd_config_discard(sdkp, lim, SD_LBP_WS16); } sdkp->capacity = lba + 1; @@ -3365,8 +3363,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); - config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); @@ -3753,9 +3749,10 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); - sd_read_cpr(sdkp); } + sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); + sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); @@ -3808,6 +3805,14 @@ static int sd_revalidate_disk(struct gendisk *disk) if (err) return err; + /* + * Query concurrent positioning ranges after + * queue_limits_commit_update() unlocked q->limits_lock to avoid + * deadlock with q->sysfs_dir_lock and q->sysfs_lock. + */ + if (sdkp->media_present && scsi_device_supports_vpd(sdp)) + sd_read_cpr(sdkp); + /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk @@ -4205,6 +4210,8 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4221,13 +4228,12 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index a0d2556a27bba32b43cda153bb6adb471c0eef0c..089653018d32c1d92663c777c8abccf475a662d6 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -431,7 +431,7 @@ int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed) struct packet_command cgc; /* avoid exceeding the max speed or overflowing integer bounds */ - speed = clamp(0, speed, 0xffff / 177); + speed = clamp(speed, 0, 0xffff / 177); if (speed == 0) speed = 0xffff; /* set to max */ diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad22d7d6b26a9148f5f65b4e2e79b4f..4dc8aba33d9b70cbdaab6369370da0ccf484a7b1 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 32baa14dfd83e5f93f81637ac5a4ce9fcbbc5456..be261ac09df823ae10cde7f141b6fe98355f6733 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -296,7 +296,7 @@ static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; - unsigned int perclk_rate, scldiv; + unsigned int perclk_rate, scldiv, div; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); @@ -313,8 +313,10 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) return -EINVAL; } + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); + for (prescale = 0; prescale < 8; prescale++) { - scldiv = perclk_rate / config.speed_hz / (1 << prescale) - 2; + scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; break; diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 77e9738e42f60ec844c74f09dddf5d90e898d87d..16054695bdb04abf03a232f3ffb6ca12ff434eb7 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -481,6 +481,9 @@ static int hisi_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (host->max_speed_hz == 0) + return dev_err_probe(dev, -EINVAL, "spi-max-frequency can't be 0\n"); + ret = device_property_read_u16(dev, "num-cs", &host->num_chipselect); if (ret) @@ -495,6 +498,7 @@ static int hisi_spi_probe(struct platform_device *pdev) host->transfer_one = hisi_spi_transfer_one; host->handle_err = hisi_spi_handle_err; host->dev.fwnode = dev->fwnode; + host->min_speed_hz = DIV_ROUND_UP(host->max_speed_hz, CLK_DIV_MAX); hisi_spi_hw_init(hs); diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c index 6246254e1dff8978424659c16058f41fe4a14924..7c1a9a9853733e231bda97d672b4fa0f082927c9 100644 --- a/drivers/spi/spi-microchip-core.c +++ b/drivers/spi/spi-microchip-core.c @@ -75,6 +75,7 @@ #define REG_CONTROL (0x00) #define REG_FRAME_SIZE (0x04) +#define FRAME_SIZE_MASK GENMASK(5, 0) #define REG_STATUS (0x08) #define REG_INT_CLEAR (0x0c) #define REG_RX_DATA (0x10) @@ -89,6 +90,9 @@ #define REG_RIS (0x24) #define REG_CONTROL2 (0x28) #define REG_COMMAND (0x2c) +#define COMMAND_CLRFRAMECNT BIT(4) +#define COMMAND_TXFIFORST BIT(3) +#define COMMAND_RXFIFORST BIT(2) #define REG_PKTSIZE (0x30) #define REG_CMD_SIZE (0x34) #define REG_HWSTATUS (0x38) @@ -103,10 +107,11 @@ struct mchp_corespi { u8 *rx_buf; u32 clk_gen; /* divider for spi output clock generated by the controller */ u32 clk_mode; + u32 pending_slave_select; int irq; int tx_len; int rx_len; - int pending; + int n_bytes; }; static inline u32 mchp_corespi_read(struct mchp_corespi *spi, unsigned int reg) @@ -130,113 +135,126 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi) static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi) { - u8 data; - int fifo_max, i = 0; + while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { + u32 data = mchp_corespi_read(spi, REG_RX_DATA); - fifo_max = min(spi->rx_len, FIFO_DEPTH); + spi->rx_len -= spi->n_bytes; - while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { - data = mchp_corespi_read(spi, REG_RX_DATA); + if (!spi->rx_buf) + continue; - if (spi->rx_buf) - *spi->rx_buf++ = data; - i++; + if (spi->n_bytes == 4) + *((u32 *)spi->rx_buf) = data; + else if (spi->n_bytes == 2) + *((u16 *)spi->rx_buf) = data; + else + *spi->rx_buf = data; + + spi->rx_buf += spi->n_bytes; } - spi->rx_len -= i; - spi->pending -= i; } static void mchp_corespi_enable_ints(struct mchp_corespi *spi) { - u32 control, mask = INT_ENABLE_MASK; - - mchp_corespi_disable(spi); - - control = mchp_corespi_read(spi, REG_CONTROL); - - control |= mask; - mchp_corespi_write(spi, REG_CONTROL, control); + u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_ENABLE; + control |= INT_ENABLE_MASK; mchp_corespi_write(spi, REG_CONTROL, control); } static void mchp_corespi_disable_ints(struct mchp_corespi *spi) { - u32 control, mask = INT_ENABLE_MASK; - - mchp_corespi_disable(spi); - - control = mchp_corespi_read(spi, REG_CONTROL); - control &= ~mask; - mchp_corespi_write(spi, REG_CONTROL, control); + u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_ENABLE; + control &= ~INT_ENABLE_MASK; mchp_corespi_write(spi, REG_CONTROL, control); } static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len) { u32 control; - u16 lenpart; + u32 lenpart; + u32 frames = mchp_corespi_read(spi, REG_FRAMESUP); /* - * Disable the SPI controller. Writes to transfer length have - * no effect when the controller is enabled. + * Writing to FRAMECNT in REG_CONTROL will reset the frame count, taking + * a shortcut requires an explicit clear. */ - mchp_corespi_disable(spi); + if (frames == len) { + mchp_corespi_write(spi, REG_COMMAND, COMMAND_CLRFRAMECNT); + return; + } /* * The lower 16 bits of the frame count are stored in the control reg * for legacy reasons, but the upper 16 written to a different register: * FRAMESUP. While both the upper and lower bits can be *READ* from the - * FRAMESUP register, writing to the lower 16 bits is a NOP + * FRAMESUP register, writing to the lower 16 bits is (supposedly) a NOP. + * + * The driver used to disable the controller while modifying the frame + * count, and mask off the lower 16 bits of len while writing to + * FRAMES_UP. When the driver was changed to disable the controller as + * infrequently as possible, it was discovered that the logic of + * lenpart = len & 0xffff_0000 + * write(REG_FRAMESUP, lenpart) + * would actually write zeros into the lower 16 bits on an mpfs250t-es, + * despite documentation stating these bits were read-only. + * Writing len unmasked into FRAMES_UP ensures those bits aren't zeroed + * on an mpfs250t-es and will be a NOP for the lower 16 bits on hardware + * that matches the documentation. */ lenpart = len & 0xffff; - control = mchp_corespi_read(spi, REG_CONTROL); control &= ~CONTROL_FRAMECNT_MASK; control |= lenpart << CONTROL_FRAMECNT_SHIFT; mchp_corespi_write(spi, REG_CONTROL, control); - - lenpart = len & 0xffff0000; - mchp_corespi_write(spi, REG_FRAMESUP, lenpart); - - control |= CONTROL_ENABLE; - mchp_corespi_write(spi, REG_CONTROL, control); + mchp_corespi_write(spi, REG_FRAMESUP, len); } static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi) { - u8 byte; int fifo_max, i = 0; - fifo_max = min(spi->tx_len, FIFO_DEPTH); + fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); mchp_corespi_set_xfer_size(spi, fifo_max); while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) { - byte = spi->tx_buf ? *spi->tx_buf++ : 0xaa; - mchp_corespi_write(spi, REG_TX_DATA, byte); + u32 word; + + if (spi->n_bytes == 4) + word = spi->tx_buf ? *((u32 *)spi->tx_buf) : 0xaa; + else if (spi->n_bytes == 2) + word = spi->tx_buf ? *((u16 *)spi->tx_buf) : 0xaa; + else + word = spi->tx_buf ? *spi->tx_buf : 0xaa; + + mchp_corespi_write(spi, REG_TX_DATA, word); + if (spi->tx_buf) + spi->tx_buf += spi->n_bytes; i++; } - spi->tx_len -= i; - spi->pending += i; + spi->tx_len -= i * spi->n_bytes; } static inline void mchp_corespi_set_framesize(struct mchp_corespi *spi, int bt) { + u32 frame_size = mchp_corespi_read(spi, REG_FRAME_SIZE); u32 control; + if ((frame_size & FRAME_SIZE_MASK) == bt) + return; + /* * Disable the SPI controller. Writes to the frame size have * no effect when the controller is enabled. */ - mchp_corespi_disable(spi); + control = mchp_corespi_read(spi, REG_CONTROL); + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); mchp_corespi_write(spi, REG_FRAME_SIZE, bt); - control = mchp_corespi_read(spi, REG_CONTROL); control |= CONTROL_ENABLE; mchp_corespi_write(spi, REG_CONTROL, control); } @@ -249,8 +267,18 @@ static void mchp_corespi_set_cs(struct spi_device *spi, bool disable) reg = mchp_corespi_read(corespi, REG_SLAVE_SELECT); reg &= ~BIT(spi_get_chipselect(spi, 0)); reg |= !disable << spi_get_chipselect(spi, 0); + corespi->pending_slave_select = reg; - mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); + /* + * Only deassert chip select immediately. Writing to some registers + * requires the controller to be disabled, which results in the + * output pins being tristated and can cause the SCLK and MOSI lines + * to transition. Therefore asserting the chip select is deferred + * until just before writing to the TX FIFO, to ensure the device + * doesn't see any spurious clock transitions whilst CS is enabled. + */ + if (((spi->mode & SPI_CS_HIGH) == 0) == disable) + mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); } static int mchp_corespi_setup(struct spi_device *spi) @@ -269,6 +297,7 @@ static int mchp_corespi_setup(struct spi_device *spi) if (spi->mode & SPI_CS_HIGH) { reg = mchp_corespi_read(corespi, REG_SLAVE_SELECT); reg |= BIT(spi_get_chipselect(spi, 0)); + corespi->pending_slave_select = reg; mchp_corespi_write(corespi, REG_SLAVE_SELECT, reg); } return 0; @@ -279,17 +308,13 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * unsigned long clk_hz; u32 control = mchp_corespi_read(spi, REG_CONTROL); - control |= CONTROL_MASTER; + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); + control |= CONTROL_MASTER; control &= ~CONTROL_MODE_MASK; control |= MOTOROLA_MODE; - mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); - - /* max. possible spi clock rate is the apb clock rate */ - clk_hz = clk_get_rate(spi->clk); - host->max_speed_hz = clk_hz; - /* * The controller must be configured so that it doesn't remove Chip * Select until the entire message has been transferred, even if at @@ -298,11 +323,16 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * * BIGFIFO mode is also enabled, which sets the fifo depth to 32 frames * for the 8 bit transfers that this driver uses. */ - control = mchp_corespi_read(spi, REG_CONTROL); control |= CONTROL_SPS | CONTROL_BIGFIFO; mchp_corespi_write(spi, REG_CONTROL, control); + mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); + + /* max. possible spi clock rate is the apb clock rate */ + clk_hz = clk_get_rate(spi->clk); + host->max_speed_hz = clk_hz; + mchp_corespi_enable_ints(spi); /* @@ -310,7 +340,8 @@ static void mchp_corespi_init(struct spi_controller *host, struct mchp_corespi * * select is relinquished to the hardware. SSELOUT is enabled too so we * can deal with active high targets. */ - mchp_corespi_write(spi, REG_SLAVE_SELECT, SSELOUT | SSEL_DIRECT); + spi->pending_slave_select = SSELOUT | SSEL_DIRECT; + mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); control = mchp_corespi_read(spi, REG_CONTROL); @@ -324,8 +355,6 @@ static inline void mchp_corespi_set_clk_gen(struct mchp_corespi *spi) { u32 control; - mchp_corespi_disable(spi); - control = mchp_corespi_read(spi, REG_CONTROL); if (spi->clk_mode) control |= CONTROL_CLKMODE; @@ -334,12 +363,12 @@ static inline void mchp_corespi_set_clk_gen(struct mchp_corespi *spi) mchp_corespi_write(spi, REG_CLK_GEN, spi->clk_gen); mchp_corespi_write(spi, REG_CONTROL, control); - mchp_corespi_write(spi, REG_CONTROL, control | CONTROL_ENABLE); } static inline void mchp_corespi_set_mode(struct mchp_corespi *spi, unsigned int mode) { - u32 control, mode_val; + u32 mode_val; + u32 control = mchp_corespi_read(spi, REG_CONTROL); switch (mode & SPI_MODE_X_MASK) { case SPI_MODE_0: @@ -357,12 +386,13 @@ static inline void mchp_corespi_set_mode(struct mchp_corespi *spi, unsigned int } /* - * Disable the SPI controller. Writes to the frame size have + * Disable the SPI controller. Writes to the frame protocol have * no effect when the controller is enabled. */ - mchp_corespi_disable(spi); - control = mchp_corespi_read(spi, REG_CONTROL); + control &= ~CONTROL_ENABLE; + mchp_corespi_write(spi, REG_CONTROL, control); + control &= ~(SPI_MODE_X_MASK << MODE_X_MASK_SHIFT); control |= mode_val; @@ -383,21 +413,18 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id) if (intfield == 0) return IRQ_NONE; - if (intfield & INT_TXDONE) { + if (intfield & INT_TXDONE) mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE); + if (intfield & INT_RXRDY) { + mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); + if (spi->rx_len) mchp_corespi_read_fifo(spi); - - if (spi->tx_len) - mchp_corespi_write_fifo(spi); - - if (!spi->rx_len) - finalise = true; } - if (intfield & INT_RXRDY) - mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); + if (!spi->rx_len && !spi->tx_len) + finalise = true; if (intfield & INT_RX_CHANNEL_OVERFLOW) { mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW); @@ -477,13 +504,17 @@ static int mchp_corespi_transfer_one(struct spi_controller *host, spi->rx_buf = xfer->rx_buf; spi->tx_len = xfer->len; spi->rx_len = xfer->len; - spi->pending = 0; + spi->n_bytes = roundup_pow_of_two(DIV_ROUND_UP(xfer->bits_per_word, BITS_PER_BYTE)); - mchp_corespi_set_xfer_size(spi, (spi->tx_len > FIFO_DEPTH) - ? FIFO_DEPTH : spi->tx_len); + mchp_corespi_set_framesize(spi, xfer->bits_per_word); - if (spi->tx_len) + mchp_corespi_write(spi, REG_COMMAND, COMMAND_RXFIFORST | COMMAND_TXFIFORST); + + mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); + + while (spi->tx_len) mchp_corespi_write_fifo(spi); + return 1; } @@ -493,7 +524,6 @@ static int mchp_corespi_prepare_message(struct spi_controller *host, struct spi_device *spi_dev = msg->spi; struct mchp_corespi *spi = spi_controller_get_devdata(host); - mchp_corespi_set_framesize(spi, DEFAULT_FRAMESIZE); mchp_corespi_set_mode(spi, spi_dev->mode); return 0; @@ -521,7 +551,7 @@ static int mchp_corespi_probe(struct platform_device *pdev) host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; host->use_gpio_descriptors = true; host->setup = mchp_corespi_setup; - host->bits_per_word_mask = SPI_BPW_MASK(8); + host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); host->transfer_one = mchp_corespi_transfer_one; host->prepare_message = mchp_corespi_prepare_message; host->set_cs = mchp_corespi_set_cs; diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 95fb5f1c91c176099005f2dfdc7cdc3425be92a7..5304728c68c20d3a7e62538585993c013744b3b7 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -700,6 +700,7 @@ static const struct class spidev_class = { }; static const struct spi_device_id spidev_spi_ids[] = { + { .name = "bh2228fv" }, { .name = "dh2228fv" }, { .name = "ltc2488" }, { .name = "sx1301" }, @@ -734,6 +735,7 @@ static const struct of_device_id spidev_dt_ids[] = { { .compatible = "lwn,bk4", .data = &spidev_of_check }, { .compatible = "menlo,m53cpld", .data = &spidev_of_check }, { .compatible = "micron,spi-authenta", .data = &spidev_of_check }, + { .compatible = "rohm,bh2228fv", .data = &spidev_of_check }, { .compatible = "rohm,dh2228fv", .data = &spidev_of_check }, { .compatible = "semtech,sx1301", .data = &spidev_of_check }, { .compatible = "silabs,em3581", .data = &spidev_of_check }, diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index f240fcc5a4e14a61a7b9db39b3091527b7a4d07c..9ba9495fcc4bae401fa054f7da244d35f1c120ca 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -398,7 +398,7 @@ static int pmic_arb_fmt_read_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, u8 sid, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -477,7 +477,7 @@ static int pmic_arb_fmt_write_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -1702,7 +1702,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "cnfg"); if (index < 0) { - dev_err(dev, "cnfg reg region missing"); + dev_err(dev, "cnfg reg region missing\n"); return -EINVAL; } @@ -1712,7 +1712,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "intr"); if (index < 0) { - dev_err(dev, "intr reg region missing"); + dev_err(dev, "intr reg region missing\n"); return -EINVAL; } @@ -1737,8 +1737,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, dev_dbg(&pdev->dev, "adding irq domain for bus %d\n", bus_index); - bus->domain = irq_domain_add_tree(dev->of_node, - &pmic_arb_irq_domain_ops, bus); + bus->domain = irq_domain_add_tree(node, &pmic_arb_irq_domain_ops, bus); if (!bus->domain) { dev_err(&pdev->dev, "unable to create irq_domain\n"); return -ENOMEM; diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h index 7349943bba2b469cae3a5a177711f5b307459c03..160c496784b7fc775f99fbdd52784493c3bfce9d 100644 --- a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h +++ b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h @@ -22,11 +22,6 @@ /* force a value to a lower even value */ #define EVEN_FLOOR(x) ((x) & ~1) -/* for preprocessor and array sizing use MIN and MAX - otherwise use min and max */ -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) - #define CEIL_DIV(a, b) (((b) != 0) ? ((a) + (b) - 1) / (b) : 0) #define CEIL_MUL(a, b) (CEIL_DIV(a, b) * (b)) #define CEIL_MUL2(a, b) (((a) + (b) - 1) & ~((b) - 1)) diff --git a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h index 961c612880833a807366bed58d35a68f5a871d2f..aad860e54d3a71b7b9a443c152ad4f02bdac949a 100644 --- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h +++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h @@ -27,12 +27,16 @@ #include "ia_css_prbs.h" #include "ia_css_input_port.h" -/* Input modes, these enumerate all supported input modes. - * Note that not all ISP modes support all input modes. +/* + * Input modes, these enumerate all supported input modes. + * This enum is part of the atomisp firmware ABI and must + * NOT be changed! + * Note that not all ISP modes support all input modes. */ enum ia_css_input_mode { IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */ IA_CSS_INPUT_MODE_FIFO, /** data from input-fifo */ + IA_CSS_INPUT_MODE_TPG, /** data from test-pattern generator */ IA_CSS_INPUT_MODE_PRBS, /** data from pseudo-random bit stream */ IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */ IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */ diff --git a/drivers/staging/media/atomisp/pci/sh_css_internal.h b/drivers/staging/media/atomisp/pci/sh_css_internal.h index a2d972ea3fa084dd2070666219c66aff6155bf36..959e7f549641c10f5243a734d69ea89b2a412c8d 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_internal.h +++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h @@ -344,7 +344,14 @@ struct sh_css_sp_input_formatter_set { #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3) -/* SP configuration information */ +/* + * SP configuration information + * + * This struct is part of the atomisp firmware ABI and is directly copied + * to ISP DRAM by sh_css_store_sp_group_to_ddr() + * + * Do NOT change this struct's layout or remove seemingly unused fields! + */ struct sh_css_sp_config { u8 no_isp_sync; /* Signal host immediately after start */ u8 enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */ @@ -354,6 +361,10 @@ struct sh_css_sp_config { host (true) or when they are passed to the preview/video pipe (false). */ + /* + * Note the fields below are only used on the ISP2400 not on the ISP2401, + * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401. + */ struct { u8 a_changed; u8 b_changed; @@ -363,11 +374,13 @@ struct sh_css_sp_config { } input_formatter; sync_generator_cfg_t sync_gen; + tpg_cfg_t tpg; prbs_cfg_t prbs; input_system_cfg_t input_circuit; u8 input_circuit_cfg_changed; - u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; - u8 enable_isys_event_queue; + u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; + /* These last 2 fields are used on both the ISP2400 and the ISP2401 */ + u8 enable_isys_event_queue; u8 disable_cont_vf; }; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 4a2e869b9538c170112c3b09dc05ee31ac70bcc1..daed67d19efb818d9ac738df5d25a4c4fbd5eb53 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,6 +13,28 @@ #include "thermal_core.h" +static void bang_bang_set_instance_target(struct thermal_instance *instance, + unsigned int target) +{ + if (instance->target != 0 && instance->target != 1 && + instance->target != THERMAL_NO_TARGET) + pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", + instance->target, instance->name); + + /* + * Enable the fan when the trip is crossed on the way up and disable it + * when the trip is crossed on the way down. + */ + instance->target = target; + instance->initialized = true; + + dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + + mutex_lock(&instance->cdev->lock); + __thermal_cdev_update(instance->cdev); + mutex_unlock(&instance->cdev->lock); +} + /** * bang_bang_control - controls devices associated with the given zone * @tz: thermal_zone_device @@ -54,33 +76,60 @@ static void bang_bang_control(struct thermal_zone_device *tz, tz->temperature, trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; + if (instance->trip == trip) + bang_bang_set_instance_target(instance, crossed_up); + } +} + +static void bang_bang_manage(struct thermal_zone_device *tz) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; - if (instance->target != 0 && instance->target != 1 && - instance->target != THERMAL_NO_TARGET) - pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", - instance->target, instance->name); + /* If the code below has run already, nothing needs to be done. */ + if (tz->governor_data) + return; - /* - * Enable the fan when the trip is crossed on the way up and - * disable it when the trip is crossed on the way down. - */ - instance->target = crossed_up; + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; - dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + if (tz->temperature >= td->threshold || + trip->temperature == THERMAL_TEMP_INVALID || + trip->type == THERMAL_TRIP_CRITICAL || + trip->type == THERMAL_TRIP_HOT) + continue; - mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ - mutex_unlock(&instance->cdev->lock); + /* + * If the initial cooling device state is "on", but the zone + * temperature is not above the trip point, the core will not + * call bang_bang_control() until the zone temperature reaches + * the trip point temperature which may be never. In those + * cases, set the initial state of the cooling device to 0. + */ + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (!instance->initialized && instance->trip == trip) + bang_bang_set_instance_target(instance, 0); + } } - list_for_each_entry(instance, &tz->thermal_instances, tz_node) - thermal_cdev_update(instance->cdev); + tz->governor_data = (void *)true; +} + +static void bang_bang_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + /* + * Let bang_bang_manage() know that it needs to walk trips after binding + * a new cdev and after system resume. + */ + if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME) + tz->governor_data = NULL; } static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, + .manage = bang_bang_manage, + .update_tz = bang_bang_update_tz, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 114136893a59af51c61465881c50d70423dea54f..006614921870288f607e9c067dfa70f28b41af28 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -278,20 +278,32 @@ static struct thermal_zone_params tzone_params = { static bool msi_irq; +static void proc_thermal_free_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) +{ + int i; + + for (i = 0; i < MSI_THERMAL_MAX; i++) { + if (proc_thermal_msi_map[i]) + devm_free_irq(&pdev->dev, proc_thermal_msi_map[i], pci_info); + } + + pci_free_irq_vectors(pdev); +} + static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) { - int ret, i, irq; + int ret, i, irq, count; - ret = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); - if (ret < 0) { + count = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (count < 0) { dev_err(&pdev->dev, "Failed to allocate vectors!\n"); - return ret; + return count; } dev_info(&pdev->dev, "msi enabled:%d msix enabled:%d\n", pdev->msi_enabled, pdev->msix_enabled); - for (i = 0; i < MSI_THERMAL_MAX; i++) { + for (i = 0; i < count; i++) { irq = pci_irq_vector(pdev, i); ret = devm_request_threaded_irq(&pdev->dev, irq, proc_thermal_irq_handler, @@ -310,7 +322,7 @@ static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci return 0; err_free_msi_vectors: - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); return ret; } @@ -397,7 +409,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_ err_free_vectors: if (msi_irq) - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); err_ret_tzone: thermal_zone_device_unregister(pci_info->tzone); err_del_legacy: @@ -419,6 +431,9 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); + if (msi_irq) + proc_thermal_free_msi(pdev, pci_info); + thermal_zone_device_unregister(pci_info->tzone); proc_thermal_mmio_remove(pdev, pci_info->proc_priv); if (!pci_info->no_legacy) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index f6e700e48aadb8c0f0ab13e449a30823ce815a13..e6669aeda1fff0617a7441a60fd1c12210d9df92 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -272,6 +272,44 @@ static int __init thermal_register_governors(void) return ret; } +static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz, + enum thermal_device_mode mode) +{ + if (tz->ops.change_mode) { + int ret; + + ret = tz->ops.change_mode(tz, mode); + if (ret) + return ret; + } + + tz->mode = mode; + + return 0; +} + +static void thermal_zone_broken_disable(struct thermal_zone_device *tz) +{ + struct thermal_trip_desc *td; + + dev_err(&tz->device, "Unable to get temperature, disabling!\n"); + /* + * This function only runs for enabled thermal zones, so no need to + * check for the current mode. + */ + __thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED); + thermal_notify_tz_disable(tz); + + for_each_trip_desc(tz, td) { + if (td->trip.type == THERMAL_TRIP_CRITICAL && + td->trip.temperature > THERMAL_TEMP_INVALID) { + dev_crit(&tz->device, + "Disabled thermal zone with critical trip point\n"); + return; + } + } +} + /* * Zone update section: main control loop applied to each zone while monitoring * in polling mode. The monitoring is done using a workqueue. @@ -292,6 +330,34 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, cancel_delayed_work(&tz->poll_queue); } +static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) +{ + if (error == -EAGAIN) { + thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY); + return; + } + + /* + * Print the message once to reduce log noise. It will be followed by + * another one if the temperature cannot be determined after multiple + * attempts. + */ + if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY) + dev_info(&tz->device, "Temperature check failed (%d)\n", error); + + thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies); + + tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL); + if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) { + thermal_zone_broken_disable(tz); + /* + * Restore the original recheck delay value to allow the thermal + * zone to try to recover when it is reenabled by user space. + */ + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + } +} + static void monitor_thermal_zone(struct thermal_zone_device *tz) { if (tz->mode != THERMAL_DEVICE_ENABLED) @@ -491,10 +557,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, ret = __thermal_zone_get_temp(tz, &temp); if (ret) { - if (ret != -EAGAIN) - dev_info(&tz->device, "Temperature check failed (%d)\n", ret); - - thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); + thermal_zone_recheck(tz, ret); return; } else if (temp <= THERMAL_TEMP_INVALID) { /* @@ -506,6 +569,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, goto monitor; } + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + tz->last_temperature = tz->temperature; tz->temperature = temp; @@ -540,7 +605,7 @@ monitor: static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, enum thermal_device_mode mode) { - int ret = 0; + int ret; mutex_lock(&tz->lock); @@ -548,14 +613,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, if (mode == tz->mode) { mutex_unlock(&tz->lock); - return ret; + return 0; } - if (tz->ops.change_mode) - ret = tz->ops.change_mode(tz, mode); + ret = __thermal_zone_device_set_mode(tz, mode); + if (ret) { + mutex_unlock(&tz->lock); - if (!ret) - tz->mode = mode; + return ret; + } __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); @@ -566,7 +632,7 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, else thermal_notify_tz_disable(tz); - return ret; + return 0; } int thermal_zone_device_enable(struct thermal_zone_device *tz) @@ -1445,6 +1511,7 @@ thermal_zone_device_register_with_trips(const char *type, thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; /* sys I/F */ /* Add nodes that are always present via .groups */ @@ -1661,7 +1728,8 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_debug_tz_resume(tz); thermal_zone_device_init(tz); - __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + thermal_governor_update_tz(tz, THERMAL_TZ_RESUME); + __thermal_zone_device_update(tz, THERMAL_TZ_RESUME); complete(&tz->resume); tz->resuming = false; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index ba8e6fc807cadbc01f557bcdab20dfb0263978a1..4cf2b7230d04bb6578328661e82b26ec8795fdc4 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -67,6 +67,8 @@ struct thermal_governor { * @polling_delay_jiffies: number of jiffies to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) + * @recheck_delay_jiffies: delay after a failed attempt to determine the zone + * temperature before trying again * @temperature: current temperature. This is only for core code, * drivers should use thermal_zone_get_temp() to get the * current temperature @@ -108,6 +110,7 @@ struct thermal_zone_device { int num_trips; unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; + unsigned long recheck_delay_jiffies; int temperature; int last_temperature; int emul_temperature; @@ -137,10 +140,11 @@ struct thermal_zone_device { #define THERMAL_TEMP_INIT INT_MIN /* - * Default delay after a failing thermal zone temperature check before - * attempting to check it again. + * Default and maximum delay after a failed thermal zone temperature check + * before attempting to check it again (in jiffies). */ -#define THERMAL_RECHECK_DELAY_MS 250 +#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) +#define THERMAL_MAX_RECHECK_DELAY (120 * HZ) /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index c0b679b846b3b90fd10738fd3c84226bb31f1f5a..06a0554ddc389156ccd51bbbc988bebaa47c0046 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -88,10 +88,10 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip_desc(tz, td) { - if (td->threshold < tz->temperature && td->threshold > low) + if (td->threshold <= tz->temperature && td->threshold > low) low = td->threshold; - if (td->threshold > tz->temperature && td->threshold < high) + if (td->threshold >= tz->temperature && td->threshold < high) high = td->threshold; } diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92994c5b2db3b1b21b3ed8848b4347..9ed4bb2e8d05fd2bb779c071612c6c16a45c19a5 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880e26cfef2bc4ce13fdbf37e6b79be..6a2116cbb06f92965028e01b51546903d6a0de56 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c6512096979498507680a8ac83b7544..afef1dd4ddf49c070efda75be10a1a2696e91413 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d107f07415c89eb58ee87bf3fb63d71..09b246c9e389ecf791c818b180e216f99df7de26 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e93b475e2133fd52ae8f448594b97..77efa7ee6eda29aa9a1d92740430327ffe416eb8 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index c79dcd7c8d1a6032aed8ffc9bdaf7bd1623fd17a..b4c1798a1df2a079b4a398b7f8d38d45d3264578 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -327,6 +327,7 @@ struct sc16is7xx_one { struct kthread_work reg_work; struct kthread_delayed_work ms_work; struct sc16is7xx_one_config config; + unsigned char buf[SC16IS7XX_FIFO_SIZE]; /* Rx buffer. */ unsigned int old_mctrl; u8 old_lcr; /* Value before EFR access. */ bool irda_mode; @@ -340,7 +341,6 @@ struct sc16is7xx_port { unsigned long gpio_valid_mask; #endif u8 mctrl_mask; - unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; struct sc16is7xx_one p[]; @@ -592,6 +592,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_MCR_CLKSEL_BIT, prescaler == 1 ? 0 : SC16IS7XX_MCR_CLKSEL_BIT); + mutex_lock(&one->efr_lock); + /* Backup LCR and access special register set (DLL/DLH) */ lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, @@ -606,24 +608,26 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Restore LCR and access to general register set */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&one->efr_lock); + return DIV_ROUND_CLOSEST((clk / prescaler) / 16, div); } static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, unsigned int iir) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lsr = 0, bytes_read, i; bool read_lsr = (iir == SC16IS7XX_IIR_RLSE_SRC) ? true : false; u8 ch, flag; - if (unlikely(rxlen >= sizeof(s->buf))) { + if (unlikely(rxlen >= sizeof(one->buf))) { dev_warn_ratelimited(port->dev, "ttySC%i: Possible RX FIFO overrun: %d\n", port->line, rxlen); port->icount.buf_overrun++; /* Ensure sanity of RX level */ - rxlen = sizeof(s->buf); + rxlen = sizeof(one->buf); } while (rxlen) { @@ -636,10 +640,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, lsr = 0; if (read_lsr) { - s->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); + one->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); bytes_read = 1; } else { - sc16is7xx_fifo_read(port, s->buf, rxlen); + sc16is7xx_fifo_read(port, one->buf, rxlen); bytes_read = rxlen; } @@ -672,7 +676,7 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, } for (i = 0; i < bytes_read; ++i) { - ch = s->buf[i]; + ch = one->buf[i]; if (uart_handle_sysrq_char(port, ch)) continue; @@ -690,10 +694,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, static void sc16is7xx_handle_tx(struct uart_port *port) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct tty_port *tport = &port->state->port; unsigned long flags; unsigned int txlen; + unsigned char *tail; if (unlikely(port->x_char)) { sc16is7xx_port_write(port, SC16IS7XX_THR_REG, port->x_char); @@ -718,8 +722,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) txlen = 0; } - txlen = uart_fifo_out(port, s->buf, txlen); - sc16is7xx_fifo_write(port, s->buf, txlen); + txlen = kfifo_out_linear_ptr(&tport->xmit_fifo, &tail, txlen); + sc16is7xx_fifo_write(port, tail, txlen); + uart_xmit_advance(port, txlen); uart_port_lock_irqsave(port, &flags); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 9a18d0b95a41909d2dfdc758ae464b611aecf4d7..5bea3af46abcef688d72f1bcb19074a53003f0ec 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -881,6 +881,14 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, new_flags = (__force upf_t)new_info->flags; old_custom_divisor = uport->custom_divisor; + if (!(uport->flags & UPF_FIXED_PORT)) { + unsigned int uartclk = new_info->baud_base * 16; + /* check needs to be done here before other settings made */ + if (uartclk == 0) { + retval = -EINVAL; + goto exit; + } + } if (!capable(CAP_SYS_ADMIN)) { retval = -EPERM; if (change_irq || change_port || diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 53f8c2329c30c8763c562ab85e79988640b58349..14f8f00fdcf9ab499466175ee7762b34de8ed352 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -770,8 +770,6 @@ static void sysrq_of_get_keyreset_config(void) { u32 key; struct device_node *np; - struct property *prop; - const __be32 *p; np = of_find_node_by_path("/chosen/linux,sysrq-reset-seq"); if (!np) { @@ -782,7 +780,7 @@ static void sysrq_of_get_keyreset_config(void) /* Reset in case a __weak definition was present */ sysrq_reset_seq_len = 0; - of_property_for_each_u32(np, "keyset", prop, p, key) { + of_property_for_each_u32(np, "keyset", key) { if (key == KEY_RESERVED || key > KEY_MAX || sysrq_reset_seq_len == SYSRQ_KEY_RESET_MAX) break; diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index dc2177fec7156e5f10e65581f34be64077af620e..a931fcde7ad98d1e9703037ecb0a71761f4ffb5d 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -76,8 +76,7 @@ static void addpair(int fp, int un) int main(int argc, char *argv[]) { FILE *ctbl; - const char *tblname, *rel_tblname; - const char *abs_srctree; + const char *tblname; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -102,16 +101,6 @@ int main(int argc, char *argv[]) } } - abs_srctree = getenv("abs_srctree"); - if (abs_srctree && !strncmp(abs_srctree, tblname, strlen(abs_srctree))) - { - rel_tblname = tblname + strlen(abs_srctree); - while (*rel_tblname == '/') - ++rel_tblname; - } - else - rel_tblname = tblname; - /* For now we assume the default font is always 256 characters. */ fontlen = 256; @@ -255,16 +244,13 @@ int main(int argc, char *argv[]) printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include \n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", rel_tblname, fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index ce36154ce9638aa616a392ce19356e774e0effae..7aea8fbaeee8822290686567401e502e9365e853 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -316,6 +316,11 @@ static inline int ufshcd_rpm_get_sync(struct ufs_hba *hba) return pm_runtime_get_sync(&hba->ufs_device_wlun->sdev_gendev); } +static inline int ufshcd_rpm_get_if_active(struct ufs_hba *hba) +{ + return pm_runtime_get_if_active(&hba->ufs_device_wlun->sdev_gendev); +} + static inline int ufshcd_rpm_put_sync(struct ufs_hba *hba) { return pm_runtime_put_sync(&hba->ufs_device_wlun->sdev_gendev); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index dc757ba47522b0997815e4159366282c7798076a..0b3d0c8e0ddaed063e085bc869ccb8d39e67a3a5 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2416,7 +2416,17 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) return err; } + /* + * The UFSHCI 3.0 specification does not define MCQ_SUPPORT and + * LSDB_SUPPORT, but [31:29] as reserved bits with reset value 0s, which + * means we can simply read values regardless of version. + */ hba->mcq_sup = FIELD_GET(MASK_MCQ_SUPPORT, hba->capabilities); + /* + * 0h: legacy single doorbell support is available + * 1h: indicate that legacy single doorbell support has been removed + */ + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); if (!hba->mcq_sup) return 0; @@ -4090,11 +4100,16 @@ static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba) min_sleep_time_us = MIN_DELAY_BEFORE_DME_CMDS_US - delta; else - return; /* no more delay required */ + min_sleep_time_us = 0; /* no more delay required */ } - /* allow sleep for extra 50us if needed */ - usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + if (min_sleep_time_us > 0) { + /* allow sleep for extra 50us if needed */ + usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + } + + /* update the last_dme_cmd_tstamp */ + hba->last_dme_cmd_tstamp = ktime_get(); } /** @@ -6553,7 +6568,8 @@ again: if (ufshcd_err_handling_should_stop(hba)) goto skip_err_handling; - if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { + if ((hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) && + !hba->force_reset) { bool ret; spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -8211,7 +8227,10 @@ static void ufshcd_update_rtc(struct ufs_hba *hba) */ val = ts64.tv_sec - hba->dev_info.rtc_time_baseline; - ufshcd_rpm_get_sync(hba); + /* Skip update RTC if RPM state is not RPM_ACTIVE */ + if (ufshcd_rpm_get_if_active(hba) <= 0) + return; + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, &val); ufshcd_rpm_put_sync(hba); @@ -10265,9 +10284,6 @@ int ufshcd_system_restore(struct device *dev) */ ufshcd_readl(hba, REG_UTP_TASK_REQ_LIST_BASE_H); - /* Resuming from hibernate, assume that link was OFF */ - ufshcd_set_link_off(hba); - return 0; } @@ -10496,6 +10512,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } if (!is_mcq_supported(hba)) { + if (!hba->lsdb_sup) { + dev_err(hba->dev, "%s: failed to initialize (legacy doorbell mode not supported)\n", + __func__); + err = -EINVAL; + goto out_disable; + } err = scsi_add_host(host, hba->dev); if (err) { dev_err(hba->dev, "scsi_add_host failed\n"); diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 16ad3528d80b2315e8b3ad82d0c1eafff9f9829e..9ec318ef52bff2dbc897ecb1268793ea14c5e2ce 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1293,6 +1293,9 @@ static void exynos_ufs_fmp_resume(struct ufs_hba *hba) { struct arm_smccc_res res; + if (!(hba->caps & UFSHCD_CAP_CRYPTO)) + return; + arm_smccc_smc(SMC_CMD_FMP_SECURITY, 0, SMU_EMBEDDED, CFG_DESCTYPE_3, 0, 0, 0, 0, &res); if (res.a0) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d8b096859337951f69dacaa14f72044f8faa9a20..e0ceaa72194937cdac8ca91de55b9d3c230b0108 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3734,11 +3734,9 @@ static int ffs_func_set_alt(struct usb_function *f, if (alt > MAX_ALT_SETTINGS) return -EINVAL; - if (alt != (unsigned)-1) { - intf = ffs_func_revmap_intf(func, interface); - if (intf < 0) - return intf; - } + intf = ffs_func_revmap_intf(func, interface); + if (intf < 0) + return intf; if (ffs->func) ffs_func_eps_disable(ffs->func); @@ -3753,12 +3751,6 @@ static int ffs_func_set_alt(struct usb_function *f, if (ffs->state != FFS_ACTIVE) return -ENODEV; - if (alt == (unsigned)-1) { - ffs->func = NULL; - ffs_event_add(ffs, FUNCTIONFS_DISABLE); - return 0; - } - ffs->func = func; ret = ffs_func_eps_enable(func); if (ret >= 0) { @@ -3770,7 +3762,23 @@ static int ffs_func_set_alt(struct usb_function *f, static void ffs_func_disable(struct usb_function *f) { - ffs_func_set_alt(f, 0, (unsigned)-1); + struct ffs_function *func = ffs_func_from_usb(f); + struct ffs_data *ffs = func->ffs; + + if (ffs->func) + ffs_func_eps_disable(ffs->func); + + if (ffs->state == FFS_DEACTIVATED) { + ffs->state = FFS_CLOSING; + INIT_WORK(&ffs->reset_work, ffs_reset_work); + schedule_work(&ffs->reset_work); + return; + } + + if (ffs->state == FFS_ACTIVE) { + ffs->func = NULL; + ffs_event_add(ffs, FUNCTIONFS_DISABLE); + } } static int ffs_func_setup(struct usb_function *f, diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 38e8ed3144f0603ce69cb33fa488a36ebdb4e388..3f63253ad3e074e7baf7f95d07173e627893e6cc 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -642,12 +642,21 @@ static void process_ump_stream_msg(struct f_midi2_ep *ep, const u32 *data) if (format) return; // invalid blk = (*data >> 8) & 0xff; - if (blk >= ep->num_blks) - return; - if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) - reply_ump_stream_fb_info(ep, blk); - if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) - reply_ump_stream_fb_name(ep, blk); + if (blk == 0xff) { + /* inquiry for all blocks */ + for (blk = 0; blk < ep->num_blks; blk++) { + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } + } else if (blk < ep->num_blks) { + /* only the specified block */ + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } return; } } diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 89af0feb75120864665a7c8004dfd7e66cb010f0..24299576972fe1d758bf1e9f39c63a0c3eb1fe3e 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -592,16 +592,25 @@ int u_audio_start_capture(struct g_audio *audio_dev) struct usb_ep *ep, *ep_fback; struct uac_rtd_params *prm; struct uac_params *params = &audio_dev->params; - int req_len, i; + int req_len, i, ret; prm = &uac->c_prm; dev_dbg(dev, "start capture with rate %d\n", prm->srate); ep = audio_dev->out_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for out_ep failed (%d)\n", ret); + return ret; + } + req_len = ep->maxpacket; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for out_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { @@ -629,9 +638,18 @@ int u_audio_start_capture(struct g_audio *audio_dev) return 0; /* Setup feedback endpoint */ - config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed in_ep_fback failed (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } + prm->fb_ep_enabled = true; - usb_ep_enable(ep_fback); + ret = usb_ep_enable(ep_fback); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep_fback (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } req_len = ep_fback->maxpacket; req_fback = usb_ep_alloc_request(ep_fback, GFP_ATOMIC); @@ -687,13 +705,17 @@ int u_audio_start_playback(struct g_audio *audio_dev) struct uac_params *params = &audio_dev->params; unsigned int factor; const struct usb_endpoint_descriptor *ep_desc; - int req_len, i; + int req_len, i, ret; unsigned int p_pktsize; prm = &uac->p_prm; dev_dbg(dev, "start playback with rate %d\n", prm->srate); ep = audio_dev->in_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for in_ep failed (%d)\n", ret); + return ret; + } ep_desc = ep->desc; /* @@ -720,7 +742,11 @@ int u_audio_start_playback(struct g_audio *audio_dev) uac->p_residue_mil = 0; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index eec7f7a2e40f08bb2e002c9014a21136439d756c..b394105e55d6cc783c8dc93bfa90e991d122519b 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1441,6 +1441,7 @@ void gserial_suspend(struct gserial *gser) spin_lock(&port->port_lock); spin_unlock(&serial_port_lock); port->suspended = true; + port->start_delayed = true; spin_unlock_irqrestore(&port->port_lock, flags); } EXPORT_SYMBOL_GPL(gserial_suspend); diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index b0a6137584141cc13559304a911028312a542453..cf6478f97f4a3db1784d016a8b0d2610bd2891ad 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -118,12 +118,10 @@ int usb_ep_enable(struct usb_ep *ep) goto out; /* UDC drivers can't handle endpoints with maxpacket size 0 */ - if (usb_endpoint_maxp(ep->desc) == 0) { - /* - * We should log an error message here, but we can't call - * dev_err() because there's no way to find the gadget - * given only ep. - */ + if (!ep->desc || usb_endpoint_maxp(ep->desc) == 0) { + WARN_ONCE(1, "%s: ep%d (%s) has %s\n", __func__, ep->address, ep->name, + (!ep->desc) ? "NULL descriptor" : "maxpacket 0"); + ret = -EINVAL; goto out; } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475dafbf45f82577fed04368b4dae0f067..937ce5fd58095b2efbd610d9453ab52541366af7 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059f4afb00e270bcbb288a67fdcffab..4ea2c3e072a9e3d6f77e48f6154c3a3f9bb9a848 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f827adb28c74b3ac80d4e013748a488..efdf4c228b8c0ace31ac8d956fbb1466971fe976 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be30669c81c7208a50ae5b4fa8f841fca..1a8d5e80b9aec2daf3977fee7c0d873efaea6ba7 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index b98cda1cef7322da4f6825d4b297d522ddfed876..e24cdb667307802b9eee856e20744ebf694395e8 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -382,11 +382,9 @@ static void usb251xb_get_ports_field(struct usb251xb *hub, bool ds_only, u8 *fld) { struct device *dev = hub->dev; - struct property *prop; - const __be32 *p; u32 port; - of_property_for_each_u32(dev->of_node, prop_name, prop, p, port) { + of_property_for_each_u32(dev->of_node, prop_name, port) { if ((port >= ds_only ? 1 : 0) && (port <= port_cnt)) *fld |= BIT(port); else diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 612bea504d7a877aefcb2cf8413112ed7371183b..0870c6533f8012055a27082df59c4a2e4c5f79fa 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -863,4 +863,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Winchiphead CH341 USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 670e942fdaaa18d93b223f23640dbef5594da22e..6d6ec7eed87c921218670bb2c68b18677b15af2d 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -104,7 +104,7 @@ struct garmin_packet { int seq; /* the real size of the data array, always > 0 */ int size; - __u8 data[]; + __u8 data[] __counted_by(size); }; /* structure used to keep the current state of the driver */ @@ -267,8 +267,7 @@ static int pkt_add(struct garmin_data *garmin_data_p, /* process only packets containing data ... */ if (data_length) { - pkt = kmalloc(sizeof(struct garmin_packet)+data_length, - GFP_ATOMIC); + pkt = kmalloc(struct_size(pkt, data, data_length), GFP_ATOMIC); if (!pkt) return 0; diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index 1f7bb3e4fcf22d1d11ee44f6f1dcbb609110bb6c..942cb01534232e91d0a0d0cc34e77edcd51d5621 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -1315,4 +1315,5 @@ module_usb_serial_driver(serial_drivers, mxuport_idtable); MODULE_AUTHOR("Andrew Lunn "); MODULE_AUTHOR(""); +MODULE_DESCRIPTION("Moxa UPORT USB Serial driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index 20277c52dded6b117f296bd7ee5e12a59155d2f5..82791fd67c468ecaa7f2df82e470f0035f3229ec 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -112,4 +112,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Navman USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c index 929ffba663f2035fbaf1f46c8bb3de9495fa4daa..015bb7c5d19d3bda50cdeb9535ea220e562577aa 100644 --- a/drivers/usb/serial/qcaux.c +++ b/drivers/usb/serial/qcaux.c @@ -84,4 +84,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Qualcomm USB Auxiliary Serial Port driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 09a972a838ee4c18b85f10008af2a08ab69342fe..6b294bf8bc435fd539a147b934297d45cd7c6adb 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -49,16 +49,6 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); -struct spcp8x5_usb_ctrl_arg { - u8 type; - u8 cmd; - u8 cmd_type; - u16 value; - u16 index; - u16 length; -}; - - /* spcp8x5 spec register define */ #define MCR_CONTROL_LINE_RTS 0x02 #define MCR_CONTROL_LINE_DTR 0x01 diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index d7f73ad6e778f9997ce47bc5237e4c25dc409adb..9aabb087f733cb03a119b58706d42782ed166001 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -190,4 +190,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Symbol USB barcode to serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 24b8772a345e2f898dcc345b9f01582b1a87e76a..82f4f0b992aaa76d387df54d915b895f6aea0699 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -163,4 +163,5 @@ static const struct usb_device_id id_table[] = { MODULE_DEVICE_TABLE(usb, id_table); module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("USB Serial 'Simple' driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index 6934970f180d7fd32249da35d8c380a3a64b9463..61a8425b7762a471fb642039b1156d87a801aea4 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -76,6 +76,11 @@ static void usb_debug_process_read_urb(struct urb *urb) usb_serial_generic_process_read_urb(urb); } +static void usb_debug_init_termios(struct tty_struct *tty) +{ + tty->termios.c_lflag &= ~(ECHO | ECHONL); +} + static struct usb_serial_driver debug_device = { .driver = { .owner = THIS_MODULE, @@ -85,6 +90,7 @@ static struct usb_serial_driver debug_device = { .num_ports = 1, .bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -96,6 +102,7 @@ static struct usb_serial_driver dbc_device = { .id_table = dbc_id_table, .num_ports = 1, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -104,4 +111,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table_combined); +MODULE_DESCRIPTION("USB Debug cable driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/typec/mux/fsa4480.c b/drivers/usb/typec/mux/fsa4480.c index cb7cdf90cb0aaa4184602ed2dab3f4e4382901ea..cd235339834b0b8aa0492e9eb194e283dc5b0958 100644 --- a/drivers/usb/typec/mux/fsa4480.c +++ b/drivers/usb/typec/mux/fsa4480.c @@ -13,6 +13,10 @@ #include #include +#define FSA4480_DEVICE_ID 0x00 + #define FSA4480_DEVICE_ID_VENDOR_ID GENMASK(7, 6) + #define FSA4480_DEVICE_ID_VERSION_ID GENMASK(5, 3) + #define FSA4480_DEVICE_ID_REV_ID GENMASK(2, 0) #define FSA4480_SWITCH_ENABLE 0x04 #define FSA4480_SWITCH_SELECT 0x05 #define FSA4480_SWITCH_STATUS1 0x07 @@ -251,6 +255,7 @@ static int fsa4480_probe(struct i2c_client *client) struct typec_switch_desc sw_desc = { }; struct typec_mux_desc mux_desc = { }; struct fsa4480 *fsa; + int val = 0; int ret; fsa = devm_kzalloc(dev, sizeof(*fsa), GFP_KERNEL); @@ -268,6 +273,15 @@ static int fsa4480_probe(struct i2c_client *client) if (IS_ERR(fsa->regmap)) return dev_err_probe(dev, PTR_ERR(fsa->regmap), "failed to initialize regmap\n"); + ret = regmap_read(fsa->regmap, FSA4480_DEVICE_ID, &val); + if (ret || !val) + return dev_err_probe(dev, -ENODEV, "FSA4480 not found\n"); + + dev_dbg(dev, "Found FSA4480 v%lu.%lu (Vendor ID = %lu)\n", + FIELD_GET(FSA4480_DEVICE_ID_VERSION_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_REV_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_VENDOR_ID, val)); + /* Safe mode */ fsa->cur_enable = FSA4480_ENABLE_DEVICE | FSA4480_ENABLE_USB; fsa->mode = TYPEC_STATE_SAFE; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index b862fdf3fe1d4c6c35ec60ad5eae8af84a002cad..3e3dcb983ddeabbd457294788bcb9e0a5453da2f 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -67,7 +67,7 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) return regmap_raw_write(tcpci->regmap, reg, &val, sizeof(u16)); } -static bool tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) +static int tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) { unsigned int reg; int ret; diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 26f9006e95e169c2dca496cfe9a13753075e4562..4b02d6474259109c3ccfe662c96434f0d1ad55c2 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4515,7 +4515,7 @@ static inline enum tcpm_state hard_reset_state(struct tcpm_port *port) return ERROR_RECOVERY; if (port->pwr_role == TYPEC_SOURCE) return SRC_UNATTACHED; - if (port->state == SNK_WAIT_CAPABILITIES) + if (port->state == SNK_WAIT_CAPABILITIES_TIMEOUT) return SNK_READY; return SNK_UNATTACHED; } @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index ea768b19a7f1ec1da902b07ec447668b2bd4849d..dd51a25480bfb90572976a4b3ecbdc1291790a78 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1191,14 +1191,14 @@ static int tps6598x_apply_patch(struct tps6598x *tps) dev_info(tps->dev, "Firmware update succeeded\n"); release_fw: - release_firmware(fw); if (ret) { dev_err(tps->dev, "Failed to write patch %s of %zu bytes\n", firmware_name, fw->size); } + release_firmware(fw); return ret; -}; +} static int cd321x_init(struct tps6598x *tps) { diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dcd3765cc1f532242af91ad1af5389b63adb87f0..4039851551c1b7251b5ae5ec1402092b55575073 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) @@ -238,13 +238,10 @@ static int ucsi_send_command_common(struct ucsi *ucsi, u64 cmd, mutex_lock(&ucsi->ppm_lock); ret = ucsi_run_command(ucsi, cmd, &cci, data, size, conn_ack); - if (cci & UCSI_CCI_BUSY) { - ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false); - return ret ? ret : -EBUSY; - } - - if (cci & UCSI_CCI_ERROR) - return ucsi_read_error(ucsi, connector_num); + if (cci & UCSI_CCI_BUSY) + ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false) ?: -EBUSY; + else if (cci & UCSI_CCI_ERROR) + ret = ucsi_read_error(ucsi, connector_num); mutex_unlock(&ucsi->ppm_lock); return ret; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 82650c11e4516125777dc55e4719e398fbbb644c..302a89aeb258a54211406ceb89cc89fdcbcf5519 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -745,6 +745,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag * */ if (usb_pipedevice(urb->pipe) == 0) { + struct usb_device *old; __u8 type = usb_pipetype(urb->pipe); struct usb_ctrlrequest *ctrlreq = (struct usb_ctrlrequest *) urb->setup_packet; @@ -755,14 +756,15 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto no_need_xmit; } + old = vdev->udev; switch (ctrlreq->bRequest) { case USB_REQ_SET_ADDRESS: /* set_address may come when a device is reset */ dev_info(dev, "SetAddress Request (%d) to port %d\n", ctrlreq->wValue, vdev->rhport); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); spin_lock(&vdev->ud.lock); vdev->ud.status = VDEV_ST_USED; @@ -781,8 +783,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag usbip_dbg_vhci_hc( "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n"); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); goto out; default: @@ -1067,6 +1069,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) static void vhci_device_reset(struct usbip_device *ud) { struct vhci_device *vdev = container_of(ud, struct vhci_device, ud); + struct usb_device *old = vdev->udev; unsigned long flags; spin_lock_irqsave(&ud->lock, flags); @@ -1074,8 +1077,8 @@ static void vhci_device_reset(struct usbip_device *ud) vdev->speed = 0; vdev->devid = 0; - usb_put_dev(vdev->udev); vdev->udev = NULL; + usb_put_dev(old); if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c index 7fa0491bb201eb320faa5e947b55035642af0112..11bd76ae18cf937c5dd25ec7c000abe7a4b5c130 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c @@ -140,7 +140,7 @@ static int octep_process_mbox(struct octep_hw *oct_hw, u16 id, u16 qid, void *bu val = octep_read_sig(mbox); if ((val & 0xFFFF) != MBOX_RSP_SIG) { dev_warn(&pdev->dev, "Invalid Signature from mbox : %d response\n", id); - return ret; + return -EINVAL; } val = octep_read_sts(mbox); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e31ec9ebc4ce842accc0ac2be8feb1a237c6e126..478cd46a49ede2cec23e38d6845e1e710dfa1bab 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1481,13 +1481,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - PFN_DOWN(notify.addr), PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a9b93e99c23a411d66506d8d4a7b5d9547565962..bc1f962e483b985a2d9d94c993d32ba6126f1f44 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -305,15 +305,9 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; - if (dev->config->create_avq) { - err = dev->config->create_avq(dev); - if (err) - goto err; - } - err = drv->probe(dev); if (err) - goto err_probe; + goto err; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -326,9 +320,6 @@ static int virtio_dev_probe(struct device *_d) return 0; -err_probe: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -344,9 +335,6 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -524,9 +512,6 @@ int virtio_device_freeze(struct virtio_device *dev) } } - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - return 0; } EXPORT_SYMBOL_GPL(virtio_device_freeze); @@ -562,16 +547,10 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; - if (dev->config->create_avq) { - ret = dev->config->create_avq(dev); - if (ret) - goto err; - } - if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err_restore; + goto err; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -582,9 +561,6 @@ int virtio_device_restore(struct virtio_device *dev) return 0; -err_restore: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7d82facafd75ee98f8867e4d8b16ad5052da4eb2..c44d8ba00c02c38f9d641b8ccf423efbf86ea83b 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -46,12 +46,26 @@ bool vp_notify(struct virtqueue *vq) return true; } +/* Notify all slow path virtqueues on an interrupt. */ +static void vp_vring_slow_path_interrupt(int irq, + struct virtio_pci_device *vp_dev) +{ + struct virtio_pci_vq_info *info; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->slow_virtqueues, node) + vring_interrupt(irq, info->vq); + spin_unlock_irqrestore(&vp_dev->lock, flags); +} + /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); + vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } @@ -125,6 +139,9 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (!per_vq_vectors) + desc = NULL; + if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ @@ -171,11 +188,17 @@ error: return err; } +static bool vp_is_slow_path_vector(u16 msix_vec) +{ + return msix_vec == VP_MSIX_CONFIG_VECTOR; +} + static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, - u16 msix_vec) + u16 msix_vec, + struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); @@ -194,13 +217,16 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); + if (!vp_is_slow_path_vector(msix_vec)) + list_add(&info->node, &vp_dev->virtqueues); + else + list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } - vp_dev->vqs[index] = info; + *p_info = info; return vq; out_info: @@ -236,13 +262,11 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->is_avq && vp_dev->is_avq(vdev, vq->index)) - continue; - if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; - if (v != VIRTIO_MSI_NO_VECTOR) { + if (v != VIRTIO_MSI_NO_VECTOR && + !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); @@ -284,21 +308,85 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->vqs = NULL; } +enum vp_vq_vector_policy { + VP_VQ_VECTOR_POLICY_EACH, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, + VP_VQ_VECTOR_POLICY_SHARED, +}; + +static struct virtqueue * +vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, + vq_callback_t *callback, const char *name, bool ctx, + bool slow_path, int *allocated_vectors, + enum vp_vq_vector_policy vector_policy, + struct virtio_pci_vq_info **p_info) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq; + u16 msix_vec; + int err; + + if (!callback) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || + (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && + !slow_path)) + msix_vec = (*allocated_vectors)++; + else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && + slow_path) + msix_vec = VP_MSIX_CONFIG_VECTOR; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, + p_info); + if (IS_ERR(vq)) + return vq; + + if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || + msix_vec == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(msix_vec)) + return vq; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], vq); + if (err) { + vp_del_vq(vq); + return ERR_PTR(err); + } + + return vq; +} + static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], - bool per_vq_vectors, + enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; - u16 msix_vec; int i, err, nvectors, allocated_vectors, queue_idx = 0; + struct virtqueue *vq; + bool per_vq_vectors; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto error_find; + } + + per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; + if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -307,13 +395,14 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, if (vqi->name && vqi->callback) ++nvectors; } + if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) + ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, - per_vq_vectors ? desc : NULL); + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; @@ -325,37 +414,27 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - - if (!vqi->callback) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, - vqi->name, vqi->ctx, msix_vec); + vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx, false, + &allocated_vectors, vector_policy, + &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } + } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), vqi->name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, + avq->name, false, true, &allocated_vectors, + vector_policy, &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto error_find; } + return 0; error_find: @@ -368,12 +447,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; + struct virtqueue *vq; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto out_del_vqs; + } + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) @@ -390,13 +478,24 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, - VIRTIO_MSI_NO_VECTOR); + VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, + false, VIRTIO_MSI_NO_VECTOR, + &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto out_del_vqs; + } + return 0; out_del_vqs: vp_del_vqs(vdev); @@ -411,11 +510,20 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_EACH, desc); + if (!err) + return 0; + /* Fallback: MSI-X with one shared vector for config and + * slow path queues, one vector per queue for the rest. + */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ @@ -466,7 +574,8 @@ const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || - vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR) + vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, @@ -574,6 +683,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); + INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 3c4bb2d6163a92f918923a8cef65a901fb3c7e1f..1d9c49947f52d1b6e46057bbf9f0f71216f307b5 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -35,7 +35,7 @@ struct virtio_pci_vq_info { /* the actual virtqueue */ struct virtqueue *vq; - /* the list node for the virtqueues list */ + /* the list node for the virtqueues or slow_virtqueues list */ struct list_head node; /* MSI-X vector (or none) */ @@ -44,9 +44,9 @@ struct virtio_pci_vq_info { struct virtio_pci_admin_vq { /* Virtqueue info associated with this admin queue. */ - struct virtio_pci_vq_info info; - /* serializing admin commands execution and virtqueue deletion */ - struct mutex cmd_lock; + struct virtio_pci_vq_info *info; + /* Protects virtqueue access. */ + spinlock_t lock; u64 supported_cmds; /* Name of the admin queue: avq.$vq_index. */ char name[10]; @@ -66,9 +66,12 @@ struct virtio_pci_device { /* Where to read and clear interrupt */ u8 __iomem *isr; - /* a list of queues so we can dispatch IRQs */ + /* Lists of queues and potentially slow path queues + * so we can dispatch IRQs. + */ spinlock_t lock; struct list_head virtqueues; + struct list_head slow_virtqueues; /* Array of all virtqueues reported in the * PCI common config num_queues field @@ -102,7 +105,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); - bool (*is_avq)(struct virtio_device *vdev, unsigned int index); + int (*avq_index)(struct virtio_device *vdev, u16 *index, u16 *num); }; /* Constants for MSI-X */ @@ -175,6 +178,7 @@ struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev); #define VIRTIO_ADMIN_CMD_BITMAP 0 #endif +void vp_modern_avq_done(struct virtqueue *vq); int vp_modern_admin_cmd_exec(struct virtio_device *vdev, struct virtio_admin_cmd *cmd); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 3b5b9499a53a658be513cdbe6ebc26805d699f44..9193c30d640aeb91b6bcd6bccc97bedda2418239 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -28,6 +28,21 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static int vp_avq_index(struct virtio_device *vdev, u16 *index, u16 *num) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + *num = 0; + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return 0; + + *num = vp_modern_avq_num(&vp_dev->mdev); + if (!(*num)) + return -EINVAL; + *index = vp_modern_avq_index(&vp_dev->mdev); + return 0; +} + static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -38,17 +53,35 @@ static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) return index == vp_dev->admin_vq.vq_index; } +void vp_modern_avq_done(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&admin_vq->lock, flags); + do { + virtqueue_disable_cb(vq); + while ((cmd = virtqueue_get_buf(vq, &len))) + complete(&cmd->completion); + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&admin_vq->lock, flags); +} + static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, u16 opcode, struct scatterlist **sgs, unsigned int out_num, unsigned int in_num, - void *data) + struct virtio_admin_cmd *cmd) { struct virtqueue *vq; - int ret, len; + unsigned long flags; + int ret; - vq = admin_vq->info.vq; + vq = admin_vq->info->vq; if (!vq) return -EIO; @@ -57,21 +90,33 @@ static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, !((1ULL << opcode) & admin_vq->supported_cmds)) return -EOPNOTSUPP; - ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, data, GFP_KERNEL); - if (ret < 0) - return -EIO; + init_completion(&cmd->completion); - if (unlikely(!virtqueue_kick(vq))) +again: + if (virtqueue_is_broken(vq)) return -EIO; - while (!virtqueue_get_buf(vq, &len) && - !virtqueue_is_broken(vq)) - cpu_relax(); + spin_lock_irqsave(&admin_vq->lock, flags); + ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_KERNEL); + if (ret < 0) { + if (ret == -ENOSPC) { + spin_unlock_irqrestore(&admin_vq->lock, flags); + cpu_relax(); + goto again; + } + goto unlock_err; + } + if (!virtqueue_kick(vq)) + goto unlock_err; + spin_unlock_irqrestore(&admin_vq->lock, flags); - if (virtqueue_is_broken(vq)) - return -EIO; + wait_for_completion(&cmd->completion); - return 0; + return cmd->ret; + +unlock_err: + spin_unlock_irqrestore(&admin_vq->lock, flags); + return -EIO; } int vp_modern_admin_cmd_exec(struct virtio_device *vdev, @@ -122,12 +167,9 @@ int vp_modern_admin_cmd_exec(struct virtio_device *vdev, in_num++; } - mutex_lock(&vp_dev->admin_vq.cmd_lock); ret = virtqueue_exec_admin_cmd(&vp_dev->admin_vq, le16_to_cpu(cmd->opcode), - sgs, out_num, in_num, sgs); - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - + sgs, out_num, in_num, cmd); if (ret) { dev_err(&vdev->dev, "Failed to execute command on admin vq: %d\n.", ret); @@ -188,25 +230,29 @@ end: static void vp_modern_avq_activate(struct virtio_device *vdev) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_unbreak(admin_vq->info.vq); virtio_pci_admin_cmd_list_init(vdev); } -static void vp_modern_avq_deactivate(struct virtio_device *vdev) +static void vp_modern_avq_cleanup(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + struct virtqueue *vq; if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_break(admin_vq->info.vq); + vq = vp_dev->vqs[vp_dev->admin_vq.vq_index]->vq; + if (!vq) + return; + + while ((cmd = virtqueue_detach_unused_buf(vq))) { + cmd->ret = -EIO; + complete(&cmd->completion); + } } static void vp_transport_features(struct virtio_device *vdev, u64 features) @@ -403,7 +449,7 @@ static void vp_reset(struct virtio_device *vdev) while (vp_modern_get_status(mdev)) msleep(1); - vp_modern_avq_deactivate(vdev); + vp_modern_avq_cleanup(vdev); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); @@ -552,8 +598,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (index >= vp_modern_get_num_queues(mdev) && !is_avq) return ERR_PTR(-EINVAL); - num = is_avq ? - VIRTIO_AVQ_SGS_MAX : vp_modern_get_queue_size(mdev, index); + num = vp_modern_get_queue_size(mdev, index); /* Check if queue is either not available or already active. */ if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); @@ -580,12 +625,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, goto err; } - if (is_avq) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = vq; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - return vq; err: @@ -620,12 +659,6 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - if (vp_is_avq(&vp_dev->vdev, vq->index)) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = NULL; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - if (vp_dev->msix_enabled) vp_modern_queue_vector(mdev, vq->index, VIRTIO_MSI_NO_VECTOR); @@ -735,45 +768,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, return true; } -static int vp_modern_create_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *avq; - struct virtqueue *vq; - u16 admin_q_num; - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return 0; - - admin_q_num = vp_modern_avq_num(&vp_dev->mdev); - if (!admin_q_num) - return -EINVAL; - - avq = &vp_dev->admin_vq; - avq->vq_index = vp_modern_avq_index(&vp_dev->mdev); - sprintf(avq->name, "avq.%u", avq->vq_index); - vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL, - avq->name, NULL, VIRTIO_MSI_NO_VECTOR); - if (IS_ERR(vq)) { - dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld", - PTR_ERR(vq)); - return PTR_ERR(vq); - } - - vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true); - return 0; -} - -static void vp_modern_destroy_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return; - - vp_dev->del_vq(&vp_dev->admin_vq.info); -} - static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get = NULL, .set = NULL, @@ -792,8 +786,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -814,8 +806,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; /* the PCI probing function */ @@ -839,11 +829,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; - vp_dev->is_avq = vp_is_avq; + vp_dev->avq_index = vp_avq_index; vp_dev->isr = mdev->isr; vp_dev->vdev.id = mdev->id; - mutex_init(&vp_dev->admin_vq.cmd_lock); + spin_lock_init(&vp_dev->admin_vq.lock); return 0; } @@ -851,6 +841,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) { struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - mutex_destroy(&vp_dev->admin_vq.cmd_lock); vp_modern_remove(mdev); } diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a97ceb105cd8dd02060afa539acd8ee6749357cb..24fdc74caeba47b0ba5d782f4a1e68175c00256e 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, err ?: total, false); } diff --git a/fs/afs/file.c b/fs/afs/file.c index c3f0c45ae9a9b6f6582accd6e9365862808fb7e1..ec1be0091fdb562d49a84dc14c2330c7265e9a65 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op) req->error = error; if (subreq) { - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, error ?: req->actual_len, false); req->subreq = NULL; } else if (req->done) { diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0e4f068fde469f2347b4a5edbb449..331a17f3f113b48630744591572d8f9e5feb04c7 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278c63784ec9520ffc31c7fad8cd7eb..fe730a6bf0c18c00c5c3db93ede033af5a63d62c 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708fb32f00f312c1f4f7301eb9810a2..fd3a2522bc3ed3eb56303a95681807a929cf7c3b 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: @@ -829,7 +824,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e738364da6aab03000f271604fd5a..fd790b03fbe1987af43d01e9ad210ec3d94a8e3e 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; @@ -142,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ @@ -166,8 +176,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } @@ -232,52 +242,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292ec397813c15cd2fa9f5c0fb2ced91..8563c2d268476e7f9f66457dfa62bbdb67747511 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } @@ -1738,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); @@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026f20a6a43f251dfcc042bb67f3596..386d231ceca3f667f8871d32e540ca47c1fafc44 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a9828e81ad787132b364c74484700000..d4da6343efa9bd028a9aca93223b6d261105a9fd 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; @@ -763,27 +761,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98de9879c04564db4865c8a2b1748b..7daecadb764e3eb9afa2a3e6862ac063a156c21b 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd6329ed10ac7c791311d7412f21db..0c7086e00d18f55cb2cc19e6b598f1782a72d255 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ @@ -893,6 +894,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddfa70f7353b7c99584b0a216d4b7c3..c75f2e0f32bb9696aed79075744351db6d7511e3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,9 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -836,6 +838,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0fb580dac9431d8bebfb89850bb78..e34cb2bf329c54f4b297a6f4493b4f992a989c04 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c75622c2bb22957da5d3034090c75..88d8958281e80f8e8840917ab3f43e0f4a7684ef 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb60c7db294bd4d64018da281f97e9..3df3dd2723a128f8ae11148e5d5ffeb01fe04edf 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a9479b346f2f64bd650b41f6111fec08..eb3002c4eae7b1615c8a16b17b60e119fca7a8c1 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4aaa531e9148e66d6d610a01bbce4d..56ea9a77cd4afdfb2c72e8878d60e068c291494a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1767,6 +1744,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { @@ -1952,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b22ac597d18b87e18ece7b07a92fc..2e84d22e17bdd616b681625d4b022e6d4e2a3dda 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; @@ -1921,6 +1922,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c093647a1a7ade9dd0dfc6a458e0d..dca62375d7d30fd1eef504655c56520dc6d03cec 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b578b33f592c492c8bab89971ef63..79954490627ccaa7416a27950482f1f8d5347a9f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1afc2a21c628763422c2a2a314f69..51d6289b8dee3205ee5e8f8292906a588c92d1c0 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,27 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; +} + +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; } int bch2_btree_key_cache_journal_flush(struct journal *, diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 0011072263778b3bfab0a870e0f62b486711ca98..b28c649c68389fb5d261019002ba55175a2afe07 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9b44bd887de63c3bbd1aca2f52202..a0101d9c5d83f6b8a9f06a07d4215cb36d85a651 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -927,7 +883,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); @@ -973,9 +929,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; @@ -1060,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1084,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2369c110c4e6d9352d6ecd6ffd09f..b3454d4619e8fb5366f114ecfa741a196c57c533 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d246636959caf83b2f27445d47aec8094c..be2bbd2486314f12c1348892358495d5e5b82f6d 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, @@ -806,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { @@ -897,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -907,7 +924,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", @@ -1028,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1045,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d693e5117b2ebd11d0cf07a338490..edbdffd508fc3b0f1c78a47b33d554b043bc3ce7 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d075d1c2b6a9dd2b610a5ba8f274c..f70eb2127d322c4bd1adf14c15f902a1ffd7fab5 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead3c20459ac5bbaaf883e4c389901e..6a854c9184965e6754f3378a440a52c3193b4a4d 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5eac2fe665094f1070e65de5e891c..45aec1afdb0e31195ffc76a19cc9d048669be299 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308ef768a400100534cd7371872ce9b5..32bfdf19289a9dc4789ade6da8e9348dcb82afa9 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a0948d0cb3cc6c78e1f530cd3f79e6e..8945145865c53b4b46ee82d86b55689f5fed6ab4 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23eb5f896772da06f5d87a55a74fd4..e972e2bca546a9335320948287b1cc1b6c7b9f60 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,73 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +static inline bool is_zero(char *start, char *end) { - return 0; + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) +{ + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], + c, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) @@ -465,6 +527,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -501,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -530,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && @@ -697,6 +762,15 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -712,9 +786,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69614fd4fd817072db53ec8eb85202..f29fd0dd9581fb9b70cf4201c553b4bf6d0faa7f 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ @@ -107,41 +106,20 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -162,13 +140,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b333111c7ee027cd24e32074c4c8e88..7b6e6c97e6aa6a989e593aec2e671cf65749ce9a 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -124,20 +125,23 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; +} __packed; + +struct bch_acct_inum { + __u64 inum; +} __packed; + +struct bch_acct_rebalance_work { }; struct disk_accounting_pos { @@ -149,12 +153,13 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; - }; - }; + struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c638cd63f84fb25b97c9643f1c9affa..141a4c63142f5bceb8b70a8a58973b91000d01a5 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -1809,6 +1808,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) @@ -2235,6 +2237,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2266,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb62495e1d4f9d620fff8c654dbbbd9b2a..90962b3c0130572a75e0d333d3ee70c0fb4e29e8 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bfd839018992649ef4cca2f5ca9f11..ab5a7adece1042738942a03543af5691d523db91 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820033eaa3753cf111705eebd24b23d..95afa7bf20205c653bc2dd7cde3188d00fe2ce12 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad8098074ca18ee96404ac4c4847b9..2f1b86978f3660e1b001504b8b403e951c30e0c9 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35fb72aa20ba1de1a2f327a089745a6..4419ad3e454e4f4fe9eacf0bbed6f028895eae6d 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec849c90899d5cc33e910d194001f6..1a6ddee48041d13f3aa0e4483d454373141d02de 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a96a185df20084ffd7589fc6b945d..94c392abef65cdda64624c9eeec5c56875611e1e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1f5cba5ac73caf13fb540243e68f6..2be6be33afa3e0d068bf514ee7890187fb70bf62 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a7450990f80bef2e61ffeccb31921d56a95..f1fcb4c58039a5f6ee0a9185a6d3fd8065944399 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8e1eab9b87fb3fbe806a0604510a5..177ed331c00b1d4a05d5eb523e430732b251dd8d 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12675738bde6af7d08351b073ebb45..7ee3b75480df307dffc0921bf31df71e0a1757c1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d979138821a0223dbf6aa9bf0645bd1..1d4761d150023af0c1a9dca31b765c00683e901a 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63a11489c0cab0014d29e297c60436..7664b68e6a15ee0c54aa023fed8d15c6980365da 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb3710bc5f4f7b007d4e2d52103beb08f..96f2f4f8c39787e79fc5f831d9a12edd4f0dae95 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11e52fb70362cf60745ffcc7e342b7..e6a7d8241bb807bb658f79681f7a8fef114952cc 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f9814c3504ab43bf24d7d82e5c42f..cda1725702eaf7aafa74258cdfbb952583058b23 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0ae173e8f10496c1d679028969743f..c32a05e252e2abcbcc073db148741b531d11dfe8 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218a80f5688a0eb36061db32a14e4f1..a62abcc5332ad399c2022eb155e41106539c7ea8 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525a233b36719a2ed7c4ba502c9fc75..e59c0abb47723bd2eb2b55a09f365061d0063586 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67ac6225fc2edecc8de0d538520aa4e..51afe11d8ed65b3159ad1a39aebe879970d36647 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047ae1e71a9d3a36ad9b13e6858f5361..1223b710755dac27b39b2bc48684e60beca93215 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731b64ad62803f37f93ddd79e15eb54..650a1f77ca4036f5b03fb8fa8d9b9b8a323f1add 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,6 +61,20 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ @@ -79,6 +93,21 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a7111ac63ac3b47fbc1cfba096f46..d3a498617303e680ea30dda3400bdc672ea205ca 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5bea192c9d535728714444b4f6918..8b18a9b483a4df8a608b2c366586857e74a3ac72 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e96288e8dd525fab7f65e9eb56020990..eb5ef64221d6e11b9adb47fc0f3ddabedb729914 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b558626a8eb8cf71823942fee0a4654993..dbe834cb349f4a8e6a1e72d24fafab3acf1186d4 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb252db7304d62287f1fdc0bbfc89f6..a8299ba2cab2cefa23450e7e8c277a32cc77383f 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc8198327904454da03e34a9da8931d7f92d5f0..c8c2ccbdfbb5891faa1de7e2c5ca2a50724a93ec 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec2bbc7675f8a4bfbcd8459fa5d358..e7fa2de35014596e0a471248063f245305717f0f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276aa801fd00fac8d75a991ea94612c..f393023a3ae29acf9f3c3f8309336c5f59ec4ccd 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b49c4ed7af877921dd7e2b446d75a8..dfad1d06633ddb5d5487c28009c2a276f95b5d18 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb4d9433f4556042b83ad8bc956663..c62f00322d1ed6778106f215cbe783aaaf5c36c7 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c767006e97fa5ac00b29b192e0ecd..f2b4c17a0307df035241ccaf7f581b4b2336b3d0 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c850566039596aac97a7ba600609a3f..c188a5ad64cef6b69dd20f0c9794c42bae24d71b 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c26545d71d39a3a548925c1fa65da7c356a3d81c..cd6d5bbb4b9df5e0d9d95ad634ae599f6c171b00 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -72,8 +72,10 @@ #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET #define DATA_START_OFFSET_WORDS (0) +#define MAX_SHARED_LIBS_UPDATE (0) #else #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS) +#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS) #endif struct lib_info { @@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm) return res; /* Update data segment pointers for all libraries */ - for (i = 0; i < MAX_SHARED_LIBS; i++) { + for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) { if (!libinfo.lib_list[i].loaded) continue; for (j = 0; j < MAX_SHARED_LIBS; j++) { diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c2163dd2570db2b64c9175b4daf61c0e..2e49d978f504edd1d1d8e54b2c995d885dac23ef 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4322317d7e8a66a641a1fdcb6da2d..75fa563e4cacb8b3df391fda8015140748e67ef5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2ac9296edccb7c9bd56f7f8d65953374dc80dc26..06a9e0542d708b92f9bb790715bfa384e2aa4f7f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ef15e998be03509b4081cac1626c1549468f2402..05f634eb472d84d419e1f8409e765544a0021f95 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4ad29125ae78d09cfa67f9890e2bb..67adbe9d294aecabce353c00cb5cb06bae7b7cba 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c724e7aae5907c41453dab04aa6a0..feec49e6f9c80115d3e5f180e5ff8a12024f60dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); @@ -5471,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 parent, int level) { + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *head; struct btrfs_path *path; struct btrfs_extent_inline_ref *iref; int ret; + bool exists = false; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: ret = lookup_extent_backref(trans, path, &iref, bytenr, root->fs_info->nodesize, parent, btrfs_root_id(root), level, 0); + if (ret != -ENOENT) { + /* + * If we get 0 then we found our reference, return 1, else + * return the error if it's not -ENOENT; + */ + btrfs_free_path(path); + return (ret < 0 ) ? ret : 1; + } + + /* + * We could have a delayed ref with this reference, so look it up while + * we're holding the path open to make sure we don't race with the + * delayed ref running. + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + if (!head) + goto out; + if (!mutex_trylock(&head->mutex)) { + /* + * We're contended, means that the delayed ref is running, get a + * reference and wait for the ref head to be complete and then + * try again. + */ + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref_head(head); + goto again; + } + + exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + mutex_unlock(&head->mutex); +out: + spin_unlock(&delayed_refs->lock); btrfs_free_path(path); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - return 1; + return exists ? 1 : 0; } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa7f8148cd0d06ff0707308e7f49632e3306eac1..c73cd4f89015f692dc2c81e90b22c78926b2daeb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, free_extent_map(em); em = NULL; + /* + * Although the PageDirty bit might be cleared before entering + * this function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * page for range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); btrfs_set_range_writeback(inode, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(inode->root->fs_info, @@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, page->index, cur, end); } - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * page for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); submit_extent_page(bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80fa5c05d5811ab5e79017e6bdb0ec..10ac5f657e388981a40ccc5dad7d4420570b9091 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } @@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c return 0; /* - * We want to be fast because we can be called from any path trying to - * allocate memory, so if the lock is busy we don't want to spend time + * We want to be fast so if the lock is busy we don't want to spend time * waiting for it - either some task is about to do IO for the inode or * we may have another task shrinking extent maps, here in this code, so * skip this inode. @@ -1191,9 +1190,7 @@ next: /* * Stop if we need to reschedule or there's contention on the * lock. This is to avoid slowing other tasks trying to take the - * lock and because the shrinker might be called during a memory - * allocation path and we want to avoid taking a very long time - * and slowing down all sorts of tasks. + * lock. */ if (need_resched() || rwlock_needbreak(&tree->lock)) break; @@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx if (ctx->scanned >= ctx->nr_to_scan) break; - /* - * We may be called from memory allocation paths, so we don't - * want to take too much time and slowdown tasks. - */ - if (need_resched()) - break; + cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) ctx.last_ino); } - /* - * We may be called from memory allocation paths, so we don't want to - * take too much time and slowdown tasks, so stop if we need reschedule. - */ - while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { + while (ctx.scanned < ctx.nr_to_scan) { struct btrfs_root *root; unsigned long count; + cond_resched(); + spin_lock(&fs_info->fs_roots_radix_lock); count = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)&root, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6073e32a9f360e456da900dbe4d47..9914419f3b7d6105cd16ac013b62d0c831fce4e8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -1857,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543ad4b0cd7cb7e6e4e16bc1dacf6f22..eaa1dbd313528cf33ea44f7c2bb5cde0701d70ca 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,18 +2719,19 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; - spin_unlock(&ctl->tree_lock); + WARN_ON(block_group->zone_unusable > block_group->length); + } if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2743,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab695564727be83101c4b9eecd1a43c1fc908..b1b6564ab68f0ca7ef9ff4d729c07203d3dc00e9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -1581,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1594,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2255,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; @@ -4188,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); @@ -5660,7 +5668,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; @@ -7198,6 +7206,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10133,6 +10147,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da38e6a6d404216edcae712d42132c9..fc821aa446f02f31a099635307959ecff22f72c5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d710001838501623672997ee07b1e956f298..0de9162ff48138b4c3d7d96130f33ce808a3c841 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ca711a773efa9f3823d36f5096503761f2f01fc..619fa0b8b3f6f9818e72971ea7e1d42b4b298310 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 offset = key->offset; u64 end; u64 bs = sctx->send_root->fs_info->sectorsize; + struct btrfs_file_extent_item *ei; + u64 disk_byte; + u64 data_offset; + u64 num_bytes; + struct btrfs_inode_info info = { 0 }; end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); if (offset >= end) return 0; - if (clone_root && IS_ALIGNED(end, bs)) { - struct btrfs_file_extent_item *ei; - u64 disk_byte; - u64 data_offset; + num_bytes = end - offset; - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); - data_offset = btrfs_file_extent_offset(path->nodes[0], ei); - ret = clone_range(sctx, path, clone_root, disk_byte, - data_offset, offset, end - offset); - } else { - ret = send_extent_data(sctx, path, offset, end - offset); - } + if (!clone_root) + goto write_data; + + if (IS_ALIGNED(end, bs)) + goto clone_data; + + /* + * If the extent end is not aligned, we can clone if the extent ends at + * the i_size of the inode and the clone range ends at the i_size of the + * source inode, otherwise the clone operation fails with -EINVAL. + */ + if (end != sctx->cur_inode_size) + goto write_data; + + ret = get_inode_info(clone_root->root, clone_root->ino, &info); + if (ret < 0) + return ret; + + if (clone_root->offset + num_bytes == info.size) + goto clone_data; + +write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); + sctx->cur_inode_next_write_offset = end; + return ret; + +clone_data: + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); + data_offset = btrfs_file_extent_offset(path->nodes[0], ei); + ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset, + num_bytes); sctx->cur_inode_next_write_offset = end; return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8ddd5fea47652a6114955c2ec9142..68e14fd486384e3d47236f4c40d28501fcc8ccd6 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c1606fe7b671ff349962ef18eb4317b..88b44221ce9782304fc4c7ead17c49fd593e451b 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fbcb55d2e33388314cd1e78961b6eb..98fa0f382480a2a51420d586b1e2a2fa6c58d025 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "messages.h" #include "delayed-inode.h" #include "ctree.h" @@ -683,8 +684,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } @@ -2398,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) @@ -2406,6 +2416,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); + /* + * We may be called from any task trying to allocate memory and we don't + * want to slow it down with scanning and dropping extent maps. It would + * also cause heavy lock contention if many tasks concurrently enter + * here. Therefore only allow kswapd tasks to scan and drop extent maps. + */ + if (!current_is_kswapd()) + return 0; + return btrfs_free_extent_maps(fs_info, nr_to_scan); } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b82ac58e57414a3ec323034202d4f4..56e61ac1cc64c87d4dde3e6ddf92a4daaef650d3 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [120K, 128K)"); + goto out; + } + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + ret = -ENOMEM; + goto out; + } + + /* + * Compressed extent for the file range [108K, 144K), which overlaps + * with the [120K, 128K) we previously inserted. + */ + em->start = SZ_1K * 108; + em->len = SZ_1K * 36; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_1K * 36; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + + /* + * Try to add the extent map but with a search range of [140K, 144K), + * this should succeed and adjust the extent map to the range + * [128K, 144K), with a length of 16K and an offset of 20K. + * + * This simulates a scenario where in the subvolume tree of an inode we + * have a compressed file extent item for the range [108K, 144K) and we + * have an overlapping compressed extent map for the range [120K, 128K), + * which was created by an encoded write, but its ordered extent was not + * yet completed, so the subvolume tree doesn't have yet the file extent + * item for that range - we only have the extent map in the inode's + * extent map tree. + */ + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [108K, 144K)"); + goto out; + } + + if (em->start != SZ_128K) { + test_err("unexpected extent map start %llu (should be 128K)", em->start); + ret = -EINVAL; + goto out; + } + if (em->len != SZ_16K) { + test_err("unexpected extent map length %llu (should be 16K)", em->len); + ret = -EINVAL; + goto out; + } + if (em->offset != SZ_1K * 20) { + test_err("unexpected extent map offset %llu (should be 20K)", em->offset); + ret = -EINVAL; + goto out; + } +out: + ret2 = free_extent_map_tree(inode); + if (ret == 0) + ret = ret2; + + return ret; +} + struct rmap_test_vector { u64 raid_type; u64 physical_start; @@ -1076,6 +1172,9 @@ int btrfs_test_extent_map(void) if (ret) goto out; ret = test_case_7(fs_info, BTRFS_I(inode)); + if (ret) + goto out; + ret = test_case_8(fs_info, BTRFS_I(inode)); if (ret) goto out; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6388786fd8b5d315be4a5f94a0172c79421fdd52..634d69964fe4c1aa8caa6d71ab06ada779cf226d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -569,9 +569,10 @@ static int check_dir_item(struct extent_buffer *leaf, /* dir type check */ dir_type = btrfs_dir_ftype(leaf, di); - if (unlikely(dir_type >= BTRFS_FT_MAX)) { + if (unlikely(dir_type <= BTRFS_FT_UNKNOWN || + dir_type >= BTRFS_FT_MAX)) { dir_item_err(leaf, slot, - "invalid dir item type, have %u expect [0, %u)", + "invalid dir item type, have %u expect (0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; } @@ -634,7 +635,7 @@ static int check_dir_item(struct extent_buffer *leaf, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); @@ -1289,6 +1290,19 @@ static void extent_err(const struct extent_buffer *eb, int slot, va_end(args); } +static bool is_valid_dref_root(u64 rootid) +{ + /* + * The following tree root objectids are allowed to have a data backref: + * - subvolume trees + * - data reloc tree + * - tree root + * For v1 space cache + */ + return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + rootid == BTRFS_ROOT_TREE_OBJECTID; +} + static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) @@ -1441,6 +1455,8 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; + u64 dref_root; + u64 dref_objectid; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1484,11 +1500,26 @@ static int check_extent_item(struct extent_buffer *leaf, */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); + dref_root = btrfs_extent_data_ref_root(leaf, dref); + dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); + if (unlikely(!is_valid_dref_root(dref_root))) { + extent_err(leaf, slot, + "invalid data ref root value %llu", + dref_root); + return -EUCLEAN; + } + if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || + dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid data ref objectid value %llu", + dref_root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1627,6 +1658,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { + u64 root; + u64 objectid; u64 offset; /* @@ -1634,7 +1667,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf, * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; + root = btrfs_extent_data_ref_root(leaf, dref); + objectid = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); + if (unlikely(!is_valid_dref_root(root))) { + extent_err(leaf, slot, + "invalid extent data backref root value %llu", + root); + return -EUCLEAN; + } + if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || + objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid extent data backref objectid value %llu", + root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", @@ -1716,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1752,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c16bc5250ef56cb8d0864d3b4b054466d57f250..c4744a02db753c761491b5bf4ca12977342db114 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (err >= 0) { if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); - if (err < subreq->len) + if (err < subreq->len && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { err = ceph_fscrypt_decrypt_extents(inode, @@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); if (subreq->start >= inode->i_size) @@ -424,6 +426,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) struct ceph_netfs_request_data *priv; int ret = 0; + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + if (rreq->origin != NETFS_READAHEAD) return 0; @@ -498,6 +503,11 @@ const struct netfs_request_ops ceph_netfs_ops = { }; #ifdef CONFIG_CEPH_FSCACHE +static void ceph_set_page_fscache(struct page *page) +{ + folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ +} + static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; @@ -515,6 +525,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b ceph_fscache_write_terminated, inode, true, caching); } #else +static inline void ceph_set_page_fscache(struct page *page) +{ +} + static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } @@ -706,6 +720,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = wlen; set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { @@ -789,6 +805,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ + err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 @@ -1062,7 +1080,8 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page)) { + if (PageWriteback(page) || + PagePrivate2(page) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", page); unlock_page(page); @@ -1070,6 +1089,7 @@ get_more_pages: } doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { @@ -1254,6 +1274,8 @@ new_request: } set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c4941ba245ac3d0d3ae4e0f2598838b4ceb69ca9..808c9c04827683ab069e86c617b7b90441e46552 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2097,7 +2099,7 @@ retry: } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2105,7 +2107,8 @@ retry: ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2180,6 +2183,11 @@ retry: queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3067,10 +3075,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, flags, &_got); WARN_ON_ONCE(ret == -EAGAIN); if (!ret) { +#ifdef CONFIG_DEBUG_FS struct ceph_mds_client *mdsc = fsc->mdsc; struct cap_wait cw; +#endif DEFINE_WAIT_FUNC(wait, woken_wake_function); +#ifdef CONFIG_DEBUG_FS cw.ino = ceph_ino(inode); cw.tgid = current->tgid; cw.need = need; @@ -3079,6 +3090,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, spin_lock(&mdsc->caps_list_lock); list_add(&cw.list, &mdsc->cap_wait_list); spin_unlock(&mdsc->caps_list_lock); +#endif /* make sure used fmode not timeout */ ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS); @@ -3097,9 +3109,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, remove_wait_queue(&ci->i_cap_wq, &wait); ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS); +#ifdef CONFIG_DEBUG_FS spin_lock(&mdsc->caps_list_lock); list_del(&cw.list); spin_unlock(&mdsc->caps_list_lock); +#endif if (ret == -EAGAIN) continue; @@ -3504,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3699,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3735,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3792,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5aadc56e0cc0ffb9fccf5404230eb3b306ab6ed8..18c72b305858d1c358d114a2304dfaf63e08ad79 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) } spin_lock(&mdsc->dentry_list_lock); - __dentry_dir_lease_touch(mdsc, di), + __dentry_dir_lease_touch(mdsc, di); spin_unlock(&mdsc->dentry_list_lock); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8f8de8f33abbfb5e1cc3f9114cfd7a74345e89a9..71cd70514efa5562bbee937b176f6431abd59363 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); spin_lock_init(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c2157f6e0c698219d2884461a3463bdf0216f970..276e34ab3e2ccaa40903196c9d1507fc49ed0bcb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); + ceph_flush_cap_releases(mdsc, s); + mutex_lock(&s->s_mutex); if (renew_caps) send_renew_caps(mdsc, s); @@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; INIT_LIST_HEAD(&mdsc->cap_delay_list); +#ifdef CONFIG_DEBUG_FS INIT_LIST_HEAD(&mdsc->cap_wait_list); +#endif spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); INIT_LIST_HEAD(&mdsc->snap_flush_list); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index cfa18cf915a0772059931fc5e262c0184ce0a551..9bcc7f181bfe210c24f08dc3d3b9e4e79a4cc7fe 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -416,6 +416,8 @@ struct ceph_quotarealm_inode { struct inode *inode; }; +#ifdef CONFIG_DEBUG_FS + struct cap_wait { struct list_head list; u64 ino; @@ -424,6 +426,8 @@ struct cap_wait { int want; }; +#endif + enum { CEPH_MDSC_STOPPING_BEGIN = 1, CEPH_MDSC_STOPPING_FLUSHING = 2, @@ -512,7 +516,9 @@ struct ceph_mds_client { spinlock_t caps_list_lock; struct list_head caps_list; /* unused (reserved or unreserved) */ +#ifdef CONFIG_DEBUG_FS struct list_head cap_wait_list; +#endif int caps_total_count; /* total caps allocated */ int caps_use_count; /* in use */ int caps_use_max; /* max used caps */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 885cb5d4e771a1eddc4865b4c87773680f46b382..0cdf84cd179128d118c7e9a0d34287c27859698d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -961,7 +961,8 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; - ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, + (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); if (!ceph_wb_pagevec_pool) goto bad_pagevec_pool; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b63b4cd9b5b685a930bc33673f28e7c48a93d605..6e817bf1337c6539ef7ccf3341f7e633fcbf93bd 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8be60797ea2fffdd91a27a28026a81a90c97e2e1..1b7eba38ba1e42998ce27d23545beb1c99671a9a 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -21,38 +21,32 @@ void erofs_put_metabuf(struct erofs_buf *buf) if (!buf->page) return; erofs_unmap_metabuf(buf); - put_page(buf->page); + folio_put(page_folio(buf->page)); buf->page = NULL; } -/* - * Derive the block size from inode->i_blkbits to make compatible with - * anonymous inode in fscache mode. - */ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, enum erofs_kmap_type type) { pgoff_t index = offset >> PAGE_SHIFT; - struct page *page = buf->page; - struct folio *folio; - unsigned int nofs_flag; + struct folio *folio = NULL; - if (!page || page->index != index) { + if (buf->page) { + folio = page_folio(buf->page); + if (folio_file_page(folio, index) != buf->page) + erofs_unmap_metabuf(buf); + } + if (!folio || !folio_contains(folio, index)) { erofs_put_metabuf(buf); - - nofs_flag = memalloc_nofs_save(); - folio = read_cache_folio(buf->mapping, index, NULL, NULL); - memalloc_nofs_restore(nofs_flag); + folio = read_mapping_folio(buf->mapping, index, NULL); if (IS_ERR(folio)) return folio; - - /* should already be PageUptodate, no need to lock page */ - page = folio_file_page(folio, index); - buf->page = page; } + buf->page = folio_file_page(folio, index); + if (buf->kmap_type == EROFS_NO_KMAP) { if (type == EROFS_KMAP) - buf->base = kmap_local_page(page); + buf->base = kmap_local_page(buf->page); buf->kmap_type = type; } else if (buf->kmap_type != type) { DBG_BUGON(1); diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 06a722b85a456615048be4d401097b8b796b729d..40666815046f2b7a6f8f5b7e6afd9e089ed73d3c 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -188,7 +188,7 @@ again: !rq->partial_decoding); buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in); rq->inputsize -= buf.in_size; - buf.in = dctx.kin + rq->pageofs_in, + buf.in = dctx.kin + rq->pageofs_in; dctx.bounce = strm->bounce; do { dctx.avail_out = buf.out_size - buf.out_pos; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f666705a0ac72849fb62768dbca4a..c3b90abdee37af537514449a6b25baae314e04eb 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 5f6439a63af798512f325beb4d0af112c2b252eb..419432be3223b72bb4cf7d3da550697c06ae9306 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); +#endif + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } out_unlock: erofs_put_metabuf(&buf); return err; @@ -334,14 +332,29 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags) { struct inode *const inode = d_inode(path->dentry); + bool compressed = + erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout); - if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) + if (compressed) stat->attributes |= STATX_ATTR_COMPRESSED; - stat->attributes |= STATX_ATTR_IMMUTABLE; stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); + /* + * Return the DIO alignment restrictions if requested. + * + * In EROFS, STATX_DIOALIGN is not supported in ondemand mode and + * compressed files, so in these cases we report no DIO support. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN; + if (!erofs_is_fscache_mode(inode->i_sb) && !compressed) { + stat->dio_mem_align = + bdev_logical_block_size(inode->i_sb->s_bdev); + stat->dio_offset_align = stat->dio_mem_align; + } + } generic_fillattr(idmap, request_mask, inode, stat); return 0; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396e8c340bfcde2ae4432167870a3b3..45dc15ebd870ddf2a753fe10b4242d4a3ec5e69c 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 35268263aaed9f336336138fee80069da828a504..6cb5c8916174be4ead0500d421da82df985cf5f6 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { @@ -576,6 +562,21 @@ static const struct export_operations erofs_export_ops = { .get_parent = erofs_get_parent, }; +static void erofs_set_sysfs_name(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + if (erofs_is_fscache_mode(sb)) { + if (sbi->domain_id) + super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id, + sbi->fsid); + else + super_set_sysfs_name_generic(sb, "%s", sbi->fsid); + return; + } + super_set_sysfs_name_id(sb); +} + static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -643,6 +644,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_POSIXACL; else sb->s_flags &= ~SB_POSIXACL; + erofs_set_sysfs_name(sb); #ifdef CONFIG_EROFS_FS_ZIP xa_init(&sbi->managed_pslots); diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index b80f612867c2b7711a7e9943ba0f354c16c32bc5..37afe202484091a90a90634e525954eb601b59e1 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -38,11 +38,13 @@ void *z_erofs_get_gbuf(unsigned int requiredpages) { struct z_erofs_gbuf *gbuf; + migrate_disable(); gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; spin_lock(&gbuf->lock); /* check if the buffer is too small */ if (requiredpages > gbuf->nrpages) { spin_unlock(&gbuf->lock); + migrate_enable(); /* (for sparse checker) pretend gbuf->lock is still taken */ __acquire(gbuf->lock); return NULL; @@ -57,6 +59,7 @@ void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; DBG_BUGON(gbuf->ptr != ptr); spin_unlock(&gbuf->lock); + migrate_enable(); } int z_erofs_gbuf_growsize(unsigned int nrpages) @@ -108,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } diff --git a/fs/exec.c b/fs/exec.c index a126e3d1cacb01174fec72d4211bff2a4cd9f52a..50e76cc633c4baec0aa90240436c46f63c9a0e32 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1692,6 +1692,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; + int err; if (!mnt_may_suid(file->f_path.mnt)) return; @@ -1708,12 +1709,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) /* Be careful if suid/sgid is set */ inode_lock(inode); - /* reload atomically mode/uid/gid now that lock held */ + /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); + /* Did the exec bit vanish out from under us? Give up. */ + if (err) + return; + /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) diff --git a/fs/file.c b/fs/file.c index a3b72aa64f1164380fdaa6524081a4ac956831da..655338effe9c725574cf19e737a9384e7a460356 100644 --- a/fs/file.c +++ b/fs/file.c @@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu) #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) +#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ -static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, - unsigned int count) +static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, + unsigned int copy_words) { - unsigned int cpy, set; - - cpy = count / BITS_PER_BYTE; - set = (nfdt->max_fds - count) / BITS_PER_BYTE; - memcpy(nfdt->open_fds, ofdt->open_fds, cpy); - memset((char *)nfdt->open_fds + cpy, 0, set); - memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); - memset((char *)nfdt->close_on_exec + cpy, 0, set); - - cpy = BITBIT_SIZE(count); - set = BITBIT_SIZE(nfdt->max_fds) - cpy; - memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); - memset((char *)nfdt->full_fds_bits + cpy, 0, set); + unsigned int nwords = fdt_words(nfdt); + + bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits, + copy_words, nwords); } /* @@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); + copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt)); } /* @@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int open_files = sane_fdtable_size(old_fdt, max_fds); } - copy_fd_bitmaps(new_fdt, old_fdt, open_files); + copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG); old_fds = old_fdt->fd; new_fds = new_fdt->fd; @@ -1248,6 +1244,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de124b3b469f5487beebbaf7630eb3..7146038b2fe7d69d2dbd57d80a100e5822a5d6e9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 777c1f77ff58446ac0bbaeb427ff4f9c567ba682..22df574ca99e76937655331176443119f12a8c43 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "hostfs.h" #include @@ -929,7 +930,6 @@ static const struct inode_operations hostfs_link_iops = { static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct hostfs_fs_info *fsi = sb->s_fs_info; - const char *host_root = fc->source; struct inode *root_inode; int err; @@ -943,15 +943,6 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - /* NULL is printed as '(null)' by printf(): avoid that. */ - if (fc->source == NULL) - host_root = ""; - - fsi->host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); - if (fsi->host_root_path == NULL) - return -ENOMEM; - root_inode = hostfs_iget(sb, fsi->host_root_path); if (IS_ERR(root_inode)) return PTR_ERR(root_inode); @@ -977,6 +968,58 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) return 0; } +enum hostfs_parma { + Opt_hostfs, +}; + +static const struct fs_parameter_spec hostfs_param_specs[] = { + fsparam_string_empty("hostfs", Opt_hostfs), + {} +}; + +static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + struct fs_parse_result result; + char *host_root; + int opt; + + opt = fs_parse(fc, hostfs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_hostfs: + host_root = param->string; + if (!*host_root) + host_root = ""; + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + break; + } + + return 0; +} + +static int hostfs_parse_monolithic(struct fs_context *fc, void *data) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + char *host_root = (char *)data; + + /* NULL is printed as '(null)' by printf(): avoid that. */ + if (host_root == NULL) + host_root = ""; + + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + + return 0; +} + static int hostfs_fc_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, hostfs_fill_super); @@ -994,6 +1037,8 @@ static void hostfs_fc_free(struct fs_context *fc) } static const struct fs_context_operations hostfs_context_ops = { + .parse_monolithic = hostfs_parse_monolithic, + .parse_param = hostfs_parse_param, .get_tree = hostfs_fc_get_tree, .free = hostfs_fc_free, }; diff --git a/fs/inode.c b/fs/inode.c index 86670941884b48737acdfb46858d4611b0592cc0..10c4619faeef8cb81d84a91ec2d982d5a1a51a5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode) this_cpu_dec(nr_unused); } +static void inode_pin_lru_isolating(struct inode *inode) +{ + lockdep_assert_held(&inode->i_lock); + WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode->i_state |= I_LRU_ISOLATING; +} + +static void inode_unpin_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); + inode->i_state &= ~I_LRU_ISOLATING; + smp_mb(); + wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); +} + +static void inode_wait_for_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_state & I_LRU_ISOLATING) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); + __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_LRU_ISOLATING); + } + spin_unlock(&inode->i_lock); +} + /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add @@ -657,6 +690,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_wait_for_lru_isolating(inode); + /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since @@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * be under pressure before the cache inside the highmem zone. */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { - __iget(inode); + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); if (remove_inode_buffers(inode)) { @@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, __count_vm_events(PGINODESTEAL, reap); mm_account_reclaimed_pages(reap); } - iput(inode); + inode_unpin_lru_isolating(inode); spin_lock(lru_lock); return LRU_RETRY; } diff --git a/fs/libfs.c b/fs/libfs.c index 8aa34870449fe18ce5d44ec525dd8e666de95a23..02602d00939e9688250770ae5195cab2cb5b2663 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } +static int offset_dir_open(struct inode *inode, struct file *file) +{ + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + + file->private_data = (void *)ctx->next_offset; + return 0; +} + /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx) */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { + struct inode *inode = file->f_inode; + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + switch (whence) { case SEEK_CUR: offset += file->f_pos; @@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) } /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; + if (!offset) + file->private_data = (void *)ctx->next_offset; return vfs_setpos(file, offset, LONG_MAX); } @@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return ERR_PTR(-ENOENT); + return; + + if (dentry2offset(dentry) >= last_index) { + dput(dentry); + return; + } if (!offset_dir_emit(ctx, dentry)) { dput(dentry); - break; + return; } ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; } /** @@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; + long last_index = (long)file->private_data; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + offset_iterate_dir(d_inode(dir), ctx, last_index); return 0; } const struct file_operations simple_offset_dir_operations = { + .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, diff --git a/fs/locks.c b/fs/locks.c index 9afb16e0683ffd0ff0430849686dc6ea8b8640f9..e45cad40f8b6bd0ffe007c706eac989082307fae 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2984,7 +2984,7 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - filelease_cache = kmem_cache_create("file_lock_cache", + filelease_cache = kmem_cache_create("file_lease_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index 1b78e8b65ebc142d0ff226ec1f65a37d6b3349c6..7701c037c3283f27e830a49483ce377c8a802538 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -24,7 +24,7 @@ config NETFS_STATS config NETFS_DEBUG bool "Enable dynamic debugging netfslib and FS-Cache" - depends on NETFS + depends on NETFS_SUPPORT help This permits debugging to be dynamically enabled in the local caching management module. If this is set, the debugging output may be diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a688d4c75d9926864ae80d2d26a7e1ed6a34ac3d..27c750d39476260bc971591a4735748ba2c019e6 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -9,6 +9,97 @@ #include #include "internal.h" +/* + * [DEPRECATED] Unlock the folios in a read operation for when the filesystem + * is using PG_private_2 and direct writing to the cache from here rather than + * marking the page for writeback. + * + * Note that we don't touch folio->private in this code. + */ +static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, + size_t *account) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + /* Walk through the pagecache and the I/O request lists simultaneously. + * We may have a mixture of cached and uncached sections and we only + * really want to write out the uncached sections. This is slightly + * complicated by the possibility that we might have huge pages with a + * mixture inside. + */ + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); + + rcu_read_lock(); + xas_for_each(&xas, folio, last_page) { + loff_t pg_end; + bool pg_failed = false; + bool folio_started = false; + + if (xas_retry(&xas, folio)) + continue; + + pg_end = folio_pos(folio) + folio_size(folio) - 1; + + for (;;) { + loff_t sreq_end; + + if (!subreq) { + pg_failed = true; + break; + } + + if (!folio_started && + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && + fscache_operation_valid(&rreq->cache_resources)) { + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); + folio_start_private_2(folio); + folio_started = true; + } + + pg_failed |= subreq_failed; + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) + break; + + *account += subreq->transferred; + if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + + if (pg_end == sreq_end) + break; + } + + if (!pg_failed) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) + _debug("no unlock"); + else + folio_unlock(folio); + } + } + rcu_read_unlock(); +} + /* * Unlock the folios in a read operation. We need to set PG_writeback on any * folios we're going to write back before we unlock them. @@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } } + /* Handle deprecated PG_private_2 case. */ + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { + netfs_rreq_unlock_folios_pgpriv2(rreq, &account); + goto out; + } + /* Walk through the pagecache and the I/O request lists simultaneously. * We may have a mixture of cached and uncached sections and we only * really want to write out the uncached sections. This is slightly @@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) loff_t pg_end; bool pg_failed = false; bool wback_to_cache = false; - bool folio_started = false; if (xas_retry(&xas, folio)) continue; @@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, - &subreq->flags)) { - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); - folio_start_private_2(folio); - folio_started = true; - } - } else { - wback_to_cache |= - test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); - } + + wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) @@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } rcu_read_unlock(); +out: task_io_account_read(account); if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -395,7 +483,7 @@ zero_out: } /** - * netfs_write_begin - Helper to prepare for writing + * netfs_write_begin - Helper to prepare for writing [DEPRECATED] * @ctx: The netfs context * @file: The file to read from * @mapping: The mapping to read from @@ -426,6 +514,9 @@ zero_out: * inode before calling this. * * This is usable whether or not caching is enabled. + * + * Note that this should be considered deprecated and netfs_perform_write() + * used instead. */ int netfs_write_begin(struct netfs_inode *ctx, struct file *file, struct address_space *mapping, @@ -466,7 +557,7 @@ retry: if (!netfs_is_cache_enabled(ctx) && netfs_skip_folio_read(folio, pos, len, false)) { netfs_stat(&netfs_n_rh_write_zskip); - goto have_folio; + goto have_folio_no_wait; } rreq = netfs_alloc_request(mapping, file, @@ -507,6 +598,10 @@ retry: netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: + ret = folio_wait_private_2_killable(folio); + if (ret < 0) + goto error; +have_folio_no_wait: *_folio = folio; _leave(" = 0"); return 0; diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 4726c315453c3a36007df96826414ec2757467ba..ca53c5d1622edecf633aea8f174b96e87351f370 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; - size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; + size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0b44ef5a69733b690b3c9e18beb21..d4d4b3a8b10603794e57333adfbf66562b491639 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -741,6 +741,10 @@ again_locked: spin_lock(&cookie->lock); } if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) { + if (atomic_read(&cookie->n_accesses) != 0) + /* still being accessed: postpone it */ + break; + __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LRU_DISCARDING); wake = true; diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b9836889e87257cd058eec8926201738b2..5367caf3fa28630cedf8364ac63ec3d1db7699ad 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -98,6 +98,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete); } +/* + * [DEPRECATED] Deal with the completion of writing the data to the cache. We + * have to clear the PG_fscache bits on the folios involved and release the + * caller's ref. + * + * May be called in softirq mode and we inherit a ref from the caller. + */ +static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, + bool was_async) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t unlocked = 0; + bool have_unlocked = false; + + rcu_read_lock(); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); + + xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + + /* We might have multiple writes from the same huge + * folio, but we mustn't unlock a folio more than once. + */ + if (have_unlocked && folio->index <= unlocked) + continue; + unlocked = folio_next_index(folio) - 1; + trace_netfs_folio(folio, netfs_folio_trace_end_copy); + folio_end_private_2(folio); + have_unlocked = true; + } + } + + rcu_read_unlock(); + netfs_rreq_completed(rreq, was_async); +} + +static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) /* [DEPRECATED] */ +{ + struct netfs_io_subrequest *subreq = priv; + struct netfs_io_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) { + netfs_stat(&netfs_n_rh_write_failed); + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_copy_to_cache); + } else { + netfs_stat(&netfs_n_rh_write_done); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); + + /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, was_async); + + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); +} + +/* + * [DEPRECATED] Perform any outstanding writes to the cache. We inherit a ref + * from the caller. + */ +static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct netfs_io_subrequest *subreq, *next, *p; + struct iov_iter iter; + int ret; + + trace_netfs_rreq(rreq, netfs_rreq_trace_copy); + + /* We don't want terminating writes trying to wake us up whilst we're + * still going through the list. + */ + atomic_inc(&rreq->nr_copy_ops); + + list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { + list_del_init(&subreq->rreq_link); + netfs_put_subrequest(subreq, false, + netfs_sreq_trace_put_no_copy); + } + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + /* Amalgamate adjacent writes */ + while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + next = list_next_entry(subreq, rreq_link); + if (next->start != subreq->start + subreq->len) + break; + subreq->len += next->len; + list_del_init(&next->rreq_link); + netfs_put_subrequest(next, false, + netfs_sreq_trace_put_merged); + } + + ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, + subreq->len, rreq->i_size, true); + if (ret < 0) { + trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); + trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); + continue; + } + + iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, + subreq->start, subreq->len); + + atomic_inc(&rreq->nr_copy_ops); + netfs_stat(&netfs_n_rh_write); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); + trace_netfs_sreq(subreq, netfs_sreq_trace_write); + cres->ops->write(cres, subreq->start, &iter, + netfs_rreq_copy_terminated, subreq); + } + + /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, false); +} + +static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */ +{ + struct netfs_io_request *rreq = + container_of(work, struct netfs_io_request, work); + + netfs_rreq_do_write_to_cache(rreq); +} + +static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */ +{ + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); +} + /* * Handle a short read. */ @@ -275,6 +415,10 @@ again: clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) && + test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) + return netfs_rreq_write_to_cache(rreq); + netfs_rreq_completed(rreq, was_async); } @@ -386,7 +530,8 @@ incomplete: if (transferred_or_error == 0) { if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { - subreq->error = -ENODATA; + if (rreq->origin != NETFS_DIO_READ) + subreq->error = -ENODATA; goto failed; } } else { @@ -457,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, } if (subreq->len > ictx->zero_point - subreq->start) subreq->len = ictx->zero_point - subreq->start; + + /* We limit buffered reads to the EOF, but let the + * server deal with larger-than-EOF DIO/unbuffered + * reads. + */ + if (subreq->len > rreq->i_size - subreq->start) + subreq->len = rreq->i_size - subreq->start; } - if (subreq->len > rreq->i_size - subreq->start) - subreq->len = rreq->i_size - subreq->start; if (rreq->rsize && subreq->len > rreq->rsize) subreq->len = rreq->rsize; @@ -595,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) do { _debug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); - if (rreq->origin == NETFS_DIO_READ && - rreq->start + rreq->submitted >= rreq->i_size) - break; if (!netfs_rreq_submit_slice(rreq, &io_iter)) break; + if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags)) + break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index f4a642727479252de0212f80fe32ef23468e0ebf..0294df70c3ff4563e023be18421cb85a8cca4370 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct netfs_io_request *rreq; mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; struct kmem_cache *cache = mempool->pool_data; - bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || - origin == NETFS_DIO_READ || - origin == NETFS_DIO_WRITE); - bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); int ret; for (;;) { @@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, refcount_set(&rreq->ref, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (cached) { - __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); - if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) - /* Filesystem uses deprecated PG_private_2 marking. */ - __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); - } if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 9258d30cffe3cf4275b20cd0944a817c238292b9..3f7e37e50c7d02456af24ae7cbdc9429d6875580 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, { struct netfs_io_request *wreq; struct netfs_inode *ictx; + bool is_buffered = (origin == NETFS_WRITEBACK || + origin == NETFS_WRITETHROUGH); wreq = netfs_alloc_request(mapping, file, start, 0, origin); if (IS_ERR(wreq)) @@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, _enter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) + if (is_buffered && netfs_is_cache_enabled(ictx)) fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); wreq->contiguity = wreq->start; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7202ce84d0eb032784e241fa8a82e79486948d49..7a558dea75c4092663a7a2230b4430c7a89efea0 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi { rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); return 0; } @@ -361,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index fbed0027996f8840ddbe7a16ebe457fa684c525c..e8adae1bc260a31fdab9c3ed91b7e1373609759e 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e0ea6fc2aa326c5413d792743a330ee5eec68f9..34eb2c2cbcde34377a347efe7a5e2dbffe552053 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) continue; } - ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, - SVC_SOCK_ANONYMOUS, + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 0131d83b912de1fedcaa0e1df49f6f404ee5a6ab..c034080c334b98d623540a3616ecc92fc40ecf2b 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -51,12 +51,21 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); bh->b_bdev = inode->i_sb->s_bdev; @@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) folio_unlock(bh->b_folio); folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, @@ -217,8 +232,8 @@ retry: } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index a139970e48041dbf4f7ee20e3e411e57f22505cc..862bdf23120e8abba21813ab9f0e5ca393399cab 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c71ae5c043060ebf5dd7f6d9e5f63e6e7bcf7841..4a20e92474b234dec8ace6f7e806fb3be62802c3 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) static void cifs_security_flags_handle_must_flags(unsigned int *flags) { - unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL); if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) *flags = CIFSSEC_MUST_KRB5; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index c92937bed13315cd7abdadd54863ec45a79b4b82..2c4b357d85e22cfc60476a0be6e6bf7b24b826f3 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1894,12 +1894,12 @@ init_cifs(void) WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); if (!serverclose_wq) { rc = -ENOMEM; - goto out_destroy_serverclose_wq; + goto out_destroy_deferredclose_wq; } rc = cifs_init_inodecache(); if (rc) - goto out_destroy_deferredclose_wq; + goto out_destroy_serverclose_wq; rc = cifs_init_netfs(); if (rc) @@ -1967,6 +1967,8 @@ out_destroy_netfs: cifs_destroy_netfs(); out_destroy_inodecache: cifs_destroy_inodecache(); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_destroy_deferredclose_wq: destroy_workqueue(deferredclose_wq); out_destroy_cifsoplockd_wq: @@ -1977,8 +1979,6 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); -out_destroy_serverclose_wq: - destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 62d5fee3e5ebbdefc0de78a5bd20f5d6d0dd51f9..ca2bd204bcc591904c1b32da6c1c096d2384f655 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 49 -#define CIFS_VERSION "2.49" +#define SMB3_PRODUCT_BUILD 50 +#define CIFS_VERSION "2.50" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8e86fec7dcd2ba6f54167600f651439540942045..5c9b3e6cd95f201e16a4d08679aee00686fa19c8 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -345,7 +345,7 @@ struct smb_version_operations { /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); - /* close tree connecion */ + /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, @@ -816,7 +816,7 @@ struct TCP_Server_Info { * Protected by @refpath_lock and @srv_lock. The @refpath_lock is * mostly used for not requiring a copy of @leaf_fullpath when getting * cached or new DFS referrals (which might also sleep during I/O). - * While @srv_lock is held for making string and NULL comparions against + * While @srv_lock is held for making string and NULL comparisons against * both fields as in mount(2) and cache refresh. * * format: \\HOST\SHARE[\OPTIONAL PATH] @@ -1471,29 +1471,6 @@ struct cifs_io_parms { struct TCP_Server_Info *server; }; -struct cifs_aio_ctx { - struct kref refcount; - struct list_head list; - struct mutex aio_mutex; - struct completion done; - struct iov_iter iter; - struct kiocb *iocb; - struct cifsFileInfo *cfile; - struct bio_vec *bv; - loff_t pos; - unsigned int nr_pinned_pages; - ssize_t rc; - unsigned int len; - unsigned int total_len; - unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ - bool should_dirty; - /* - * Indicates if this aio_ctx is for direct_io, - * If yes, iter is a copy of the user passed iov_iter - */ - bool direct_io; -}; - struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; @@ -1904,7 +1881,7 @@ static inline bool is_replayable_error(int error) #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 -#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 @@ -1914,11 +1891,11 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ -#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) @@ -2010,7 +1987,6 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled - * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c15bb5ee7eb7d6787f5138499591071169008bda..497bf3c447bcb59486feed877560bd920d91a70e 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); -struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); -void cifs_aio_ctx_release(struct kref *refcount); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 7a16e12f5da879bbbb8ace98a4ec4f30aafec33e..d2307162a2de15ec01462d49b8ce3191a0be9f66 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2614,6 +2614,13 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions\n"); rc = -EOPNOTSUPP; goto out_fail; + } else if (ses->server->vals->protocol_id == SMB10_PROT_ID) + if (cap_unix(ses)) + cifs_dbg(FYI, "Unix Extensions requested on SMB1 mount\n"); + else { + cifs_dbg(VFS, "SMB1 Unix Extensions not supported by server\n"); + rc = -EOPNOTSUPP; + goto out_fail; } else { cifs_dbg(VFS, "Check vers= mount option. SMB3.11 disabled but required for POSIX extensions\n"); @@ -3686,6 +3693,7 @@ error: } #endif +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* * Issue a TREE_CONNECT request. */ @@ -3807,11 +3815,25 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, else tcon->Flags = 0; cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags); - } + /* + * reset_cifs_unix_caps calls QFSInfo which requires + * need_reconnect to be false, but we would not need to call + * reset_caps if this were not a reconnect case so must check + * need_reconnect flag here. The caller will also clear + * need_reconnect when tcon was successful but needed to be + * cleared earlier in the case of unix extensions reconnect + */ + if (tcon->need_reconnect && tcon->unix_ext) { + cifs_dbg(FYI, "resetting caps for %s\n", tcon->tree_name); + tcon->need_reconnect = false; + reset_cifs_unix_caps(xid, tcon, NULL, NULL); + } + } cifs_buf_release(smb_buffer); return rc; } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ static void delayed_free(struct rcu_head *p) { diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4da6e515c554b9c3e9649ec59c4cb..1fc66bcf49eb45dd35a56b1dc408d01c92afb6bc 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) goto out; } - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); rc = rdata->server->ops->async_readv(rdata); out: @@ -315,7 +316,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -323,8 +324,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } @@ -2749,6 +2754,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2765,12 +2771,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 4a8aa1de95223dfc487e577e476be18d677b6477..dd0afa23734c8d7d1aa14a0a6fee7ba2cf9c10c5 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse.tag = tag)) { - case 0: /* SMB1 symlink */ + data->reparse.tag = tag; + if (!data->reparse.tag) { if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, &data->symlink_target); } + if (rc == -EOPNOTSUPP) + data->reparse.tag = IO_REPARSE_TAG_INTERNAL; + } + + switch (data->reparse.tag) { + case 0: /* SMB1 symlink */ + break; + case IO_REPARSE_TAG_INTERNAL: + rc = 0; + if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) { + cifs_create_junction_fattr(fattr, sb); + goto out; + } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 855ac5a62edfaa50215cfed46e361dcb79f0c8fc..44dbaf9929a4e39707630463c6270bb43d1781fd 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, static int cifs_shutdown(struct super_block *sb, unsigned long arg) { struct cifs_sb_info *sbi = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *tcon; __u32 flags; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; - if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) - return -EINVAL; + tlink = cifs_sb_tlink(sbi); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + trace_smb3_shutdown_enter(flags, tcon->tid); + if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) { + rc = -EINVAL; + goto shutdown_out_err; + } if (cifs_forced_shutdown(sbi)) - return 0; + goto shutdown_good; cifs_dbg(VFS, "shut down requested (%d)", flags); -/* trace_cifs_shutdown(sb, flags);*/ /* * see: @@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_DEFAULT: cifs_dbg(FYI, "shutdown with default flag not supported\n"); - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; /* * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not * data) but metadata writes are not cached on the client, so can treat @@ -210,11 +221,18 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; - return 0; + goto shutdown_good; default: - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; } + +shutdown_good: + trace_smb3_shutdown_done(flags, tcon->tid); return 0; +shutdown_out_err: + trace_smb3_shutdown_err(rc, flags, tcon->tid); + return rc; } static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 07c468ddb88a89d65f8a48433a055759deb3da26..c6f11e6f9eb995b6e809ee18c320d1ec2854f9f8 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) * on simple responses (wct, bcc both zero) * in particular have seen this on * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized + * one byte of bcc potentially uninitialized */ /* zero rest of bcc */ tmp[sizeof(struct smb_hdr)+1] = 0; @@ -995,60 +995,6 @@ parse_DFS_referrals_exit: return rc; } -struct cifs_aio_ctx * -cifs_aio_ctx_alloc(void) -{ - struct cifs_aio_ctx *ctx; - - /* - * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io - * to false so that we know when we have to unreference pages within - * cifs_aio_ctx_release() - */ - ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_LIST_HEAD(&ctx->list); - mutex_init(&ctx->aio_mutex); - init_completion(&ctx->done); - kref_init(&ctx->refcount); - return ctx; -} - -void -cifs_aio_ctx_release(struct kref *refcount) -{ - struct cifs_aio_ctx *ctx = container_of(refcount, - struct cifs_aio_ctx, refcount); - - cifsFileInfo_put(ctx->cfile); - - /* - * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly - * which means that iov_iter_extract_pages() was a success and thus - * that we may have references or pins on pages that we need to - * release. - */ - if (ctx->bv) { - if (ctx->should_dirty || ctx->bv_need_unpin) { - unsigned int i; - - for (i = 0; i < ctx->nr_pinned_pages; i++) { - struct page *page = ctx->bv[i].bv_page; - - if (ctx->should_dirty) - set_page_dirty(page); - if (ctx->bv_need_unpin) - unpin_user_page(page); - } - } - kvfree(ctx->bv); - } - - kfree(ctx); -} - /** * cifs_alloc_hash - allocate hash and hash context together * @name: The name of the crypto hash algo @@ -1288,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, const char *full_path, bool *islink) { + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_ses *ses = tcon->ses; size_t len; char *path; @@ -1304,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, !is_tcon_dfs(tcon)) return 0; - spin_lock(&tcon->tc_lock); - if (!tcon->origin_fullpath) { - spin_unlock(&tcon->tc_lock); + spin_lock(&server->srv_lock); + if (!server->leaf_fullpath) { + spin_unlock(&server->srv_lock); return 0; } - spin_unlock(&tcon->tc_lock); + spin_unlock(&server->srv_lock); /* * Slow path - tcon is DFS and @full_path has prefix path, so attempt diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index a0ffbda90733170bf6c80267f29c90e9fb49d62f..689d8a506d4593caa5952c390c4b0efdbdab06cf 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -505,6 +505,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, } switch (tag) { + case IO_REPARSE_TAG_INTERNAL: + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + fallthrough; case IO_REPARSE_TAG_DFS: case IO_REPARSE_TAG_DFSR: case IO_REPARSE_TAG_MOUNT_POINT: diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 6b55d1df9e2f844f04b8469823ca5d370589c871..2c0644bc4e65a7e1acaed6f5415f917d5b8a62d5 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -12,6 +12,12 @@ #include "fs_context.h" #include "cifsglob.h" +/* + * Used only by cifs.ko to ignore reparse points from files when client or + * server doesn't support FSCTL_GET_REPARSE_POINT. + */ +#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U) + static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf) { u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); @@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode) static inline bool reparse_inode_match(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cinode = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); - return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) && - CIFS_I(inode)->reparse_tag == fattr->cf_cifstag && + /* + * Do not match reparse tags when client or server doesn't support + * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct + * reparse tag from query dir response but the client won't be able to + * read the reparse point data anyway. This spares us a revalidation. + */ + if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL && + cinode->reparse_tag != fattr->cf_cifstag) + return false; + return (cinode->cifsAttrs & ATTR_REPARSE) && timespec64_equal(&ctime, &fattr->cf_ctime); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5c02a12251c8486f55095777eedee868282d1a54..9f5bc41433c15646278d4dc19115c6f0798d5c14 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: + rc = parse_create_response(data, cifs_sb, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba @@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid, cmds[num_cmds++] = SMB2_OP_GET_REPARSE; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, - FILE_READ_ATTRIBUTES | FILE_READ_EA, + FILE_READ_ATTRIBUTES | + FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); @@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); cifs_get_readable_path(tcon, full_path, &cfile); - oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES, + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7fe59235f09016bd19f13bddb94ff0603cebe8aa..322cabc69c6f1490f0d89bb884e61910efb8f39b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1812,6 +1812,10 @@ smb2_copychunk_range(const unsigned int xid, tcon = tlink_tcon(trgtfile->tlink); + trace_smb3_copychunk_enter(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); + while (len > 0) { pcchunk->SourceOffset = cpu_to_le64(src_off); pcchunk->TargetOffset = cpu_to_le64(dest_off); @@ -1863,6 +1867,9 @@ smb2_copychunk_range(const unsigned int xid, le32_to_cpu(retbuf->ChunksWritten), le32_to_cpu(retbuf->ChunkBytesWritten), bytes_written); + trace_smb3_copychunk_done(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); } else if (rc == -EINVAL) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) goto cchunk_out; @@ -2046,7 +2053,9 @@ smb2_duplicate_extents(const unsigned int xid, dup_ext_buf.ByteCount = cpu_to_le64(len); cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n", src_off, dest_off, len); - + trace_smb3_clone_enter(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); inode = d_inode(trgtfile->dentry); if (inode->i_size < dest_off + len) { rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); @@ -2075,6 +2084,15 @@ smb2_duplicate_extents(const unsigned int xid, cifs_dbg(FYI, "Non-zero response length in duplicate extents\n"); duplicate_extents_out: + if (rc) + trace_smb3_clone_err(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, + tcon->tid, tcon->ses->Suid, src_off, + dest_off, len, rc); + else + trace_smb3_clone_done(xid, srcfile->fid.volatile_fid, + trgtfile->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dest_off, len); return rc; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9fc5b11c0b6ced612f7a458f6075495dffa19b84..83facb54276a317b27333200dab66e26d16189fa 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) if (tcon->seal && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) return 1; + if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + return 1; return 0; } @@ -1562,8 +1565,14 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) cifs_small_buf_release(sess_data->iov[0].iov_base); if (rc == 0) sess_data->ses->expired_pwd = false; - else if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) + else if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) { + if (sess_data->ses->expired_pwd == false) + trace_smb3_key_expired(sess_data->server->hostname, + sess_data->ses->user_name, + sess_data->server->conn_id, + &sess_data->server->dstaddr, rc); sess_data->ses->expired_pwd = true; + } memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec)); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d74e829de51c225a94f8221f03712f9826c030d7..7bcc379014cadd4e20415b6a06914e3ad33744f8 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work) else response = get_empty_queue_buffer(info); if (!response) { - /* now switch to emtpy packet queue */ + /* now switch to empty packet queue */ if (use_receive_queue) { use_receive_queue = 0; continue; @@ -618,7 +618,7 @@ out: /* * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requries FRWR on RDMA read/write + * This implementation requires FRWR on RDMA read/write * return value: true if it is supported */ static bool frwr_is_supported(struct ib_device_attr *attrs) @@ -2177,7 +2177,7 @@ cleanup_entries: * MR available in the list. It may access the list while the * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock * as they never modify the same places. However, there may be several CPUs - * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ static struct smbd_mr *get_mr(struct smbd_connection *info) @@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* * There is no need for waiting for complemtion on ib_post_send * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actaully send I/O to remote peer + * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 36d5295c2a6f92f1e26acbd0d2b94226b0c46452..0f0c10c7ada73f1ae68c8ef929ed3696679705ac 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -206,6 +206,116 @@ DEFINE_SMB3_OTHER_ERR_EVENT(query_dir_err); DEFINE_SMB3_OTHER_ERR_EVENT(zero_err); DEFINE_SMB3_OTHER_ERR_EVENT(falloc_err); +/* + * For logging errors in reflink and copy_range ops e.g. smb2_copychunk_range + * and smb2_duplicate_extents + */ +DECLARE_EVENT_CLASS(smb3_copy_range_err_class, + TP_PROTO(unsigned int xid, + __u64 src_fid, + __u64 target_fid, + __u32 tid, + __u64 sesid, + __u64 src_offset, + __u64 target_offset, + __u32 len, + int rc), + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len, rc), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u64, src_fid) + __field(__u64, target_fid) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, src_offset) + __field(__u64, target_offset) + __field(__u32, len) + __field(int, rc) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->src_fid = src_fid; + __entry->target_fid = target_fid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->src_offset = src_offset; + __entry->target_offset = target_offset; + __entry->len = len; + __entry->rc = rc; + ), + TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, + __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) +) + +#define DEFINE_SMB3_COPY_RANGE_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_copy_range_err_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u64 src_fid, \ + __u64 target_fid, \ + __u32 tid, \ + __u64 sesid, \ + __u64 src_offset, \ + __u64 target_offset, \ + __u32 len, \ + int rc), \ + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len, rc)) + +DEFINE_SMB3_COPY_RANGE_ERR_EVENT(clone_err); +/* TODO: Add SMB3_COPY_RANGE_ERR_EVENT(copychunk_err) */ + +DECLARE_EVENT_CLASS(smb3_copy_range_done_class, + TP_PROTO(unsigned int xid, + __u64 src_fid, + __u64 target_fid, + __u32 tid, + __u64 sesid, + __u64 src_offset, + __u64 target_offset, + __u32 len), + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u64, src_fid) + __field(__u64, target_fid) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, src_offset) + __field(__u64, target_offset) + __field(__u32, len) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->src_fid = src_fid; + __entry->target_fid = target_fid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->src_offset = src_offset; + __entry->target_offset = target_offset; + __entry->len = len; + ), + TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, + __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) +) + +#define DEFINE_SMB3_COPY_RANGE_DONE_EVENT(name) \ +DEFINE_EVENT(smb3_copy_range_done_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u64 src_fid, \ + __u64 target_fid, \ + __u32 tid, \ + __u64 sesid, \ + __u64 src_offset, \ + __u64 target_offset, \ + __u32 len), \ + TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len)) + +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(copychunk_enter); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(clone_enter); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(copychunk_done); +DEFINE_SMB3_COPY_RANGE_DONE_EVENT(clone_done); + /* For logging successful read or write */ DECLARE_EVENT_CLASS(smb3_rw_done_class, @@ -1171,6 +1281,46 @@ DEFINE_EVENT(smb3_connect_err_class, smb3_##name, \ DEFINE_SMB3_CONNECT_ERR_EVENT(connect_err); +DECLARE_EVENT_CLASS(smb3_sess_setup_err_class, + TP_PROTO(char *hostname, char *username, __u64 conn_id, + const struct __kernel_sockaddr_storage *dst_addr, int rc), + TP_ARGS(hostname, username, conn_id, dst_addr, rc), + TP_STRUCT__entry( + __string(hostname, hostname) + __string(username, username) + __field(__u64, conn_id) + __array(__u8, dst_addr, sizeof(struct sockaddr_storage)) + __field(int, rc) + ), + TP_fast_assign( + struct sockaddr_storage *pss = NULL; + + __entry->conn_id = conn_id; + __entry->rc = rc; + pss = (struct sockaddr_storage *)__entry->dst_addr; + *pss = *dst_addr; + __assign_str(hostname); + __assign_str(username); + ), + TP_printk("rc=%d user=%s conn_id=0x%llx server=%s addr=%pISpsfc", + __entry->rc, + __get_str(username), + __entry->conn_id, + __get_str(hostname), + __entry->dst_addr) +) + +#define DEFINE_SMB3_SES_SETUP_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_sess_setup_err_class, smb3_##name, \ + TP_PROTO(char *hostname, \ + char *username, \ + __u64 conn_id, \ + const struct __kernel_sockaddr_storage *addr, \ + int rc), \ + TP_ARGS(hostname, username, conn_id, addr, rc)) + +DEFINE_SMB3_SES_SETUP_ERR_EVENT(key_expired); + DECLARE_EVENT_CLASS(smb3_reconnect_class, TP_PROTO(__u64 currmid, __u64 conn_id, @@ -1238,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class, __entry->command = command; ), TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x", - __entry->xid, __entry->fid, __entry->command) + __entry->xid, __entry->fid, __entry->command) ) #define DEFINE_SMB3_IOCTL_EVENT(name) \ @@ -1250,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \ DEFINE_SMB3_IOCTL_EVENT(ioctl); +DECLARE_EVENT_CLASS(smb3_shutdown_class, + TP_PROTO(__u32 flags, + __u32 tid), + TP_ARGS(flags, tid), + TP_STRUCT__entry( + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("flags=0x%x tid=0x%x", + __entry->flags, __entry->tid) +) + +#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \ + TP_PROTO(__u32 flags, \ + __u32 tid), \ + TP_ARGS(flags, tid)) + +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter); +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done); +DECLARE_EVENT_CLASS(smb3_shutdown_err_class, + TP_PROTO(int rc, + __u32 flags, + __u32 tid), + TP_ARGS(rc, flags, tid), + TP_STRUCT__entry( + __field(int, rc) + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->rc = rc; + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("rc=%d flags=0x%x tid=0x%x", + __entry->rc, __entry->flags, __entry->tid) +) +#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \ + TP_PROTO(int rc, \ + __u32 flags, \ + __u32 tid), \ + TP_ARGS(rc, flags, tid)) +DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err); DECLARE_EVENT_CLASS(smb3_credit_class, TP_PROTO(__u64 currmid, diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index adfe0d05870108ab639f6cf8d5760d6f525146b6..6e68aaf5bd20357e35b0aa090ca75acc9f3b4017 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, out: /* * This will dequeue all mids. After this it is important that the - * demultiplex_thread will not process any of these mids any futher. + * demultiplex_thread will not process any of these mids any further. * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d252ecb851e3c9fa64a7ba34d4f5278..c769f9dbc0b467929cb9877817d61720a7349d76 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592f522e44e309505078e04b6853cad..7889df8112b4eecf6b0ee91d2c04ec84e3a54326 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2df5f33bba86bfb3e88ae1917635f..5b947175c048eb77597dfab486f60dd183d0d329 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index e0a6b758094fc57c5fd47d0be2ab9f4d1e94ee70..d8d03070ae44b46dab1505227f617218d09562e2 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -15,6 +15,7 @@ #include "share_config.h" #include "user_config.h" #include "user_session.h" +#include "../connection.h" #include "../transport_ipc.h" #include "../misc.h" @@ -120,12 +121,13 @@ static int parse_veto_list(struct ksmbd_share_config *share, return 0; } -static struct ksmbd_share_config *share_config_request(struct unicode_map *um, +static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config_response *resp; struct ksmbd_share_config *share = NULL; struct ksmbd_share_config *lookup; + struct unicode_map *um = work->conn->um; int ret; resp = ksmbd_ipc_share_config_request(name); @@ -181,7 +183,14 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, KSMBD_SHARE_CONFIG_VETO_LIST(resp), resp->veto_list_sz); if (!ret && share->path) { + if (__ksmbd_override_fsids(work, share)) { + kill_share(share); + share = NULL; + goto out; + } + ret = kern_path(share->path, 0, &share->vfs_path); + ksmbd_revert_fsids(work); if (ret) { ksmbd_debug(SMB, "failed to access '%s'\n", share->path); @@ -214,7 +223,7 @@ out: return share; } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config *share; @@ -227,7 +236,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, if (share) return share; - return share_config_request(um, name); + return share_config_request(work, name); } bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h index 5f591751b92365eac17d7e391ccdd57bc0a2e9b6..d4ac2dd4de204053e62634a2149fc588b9fad77f 100644 --- a/fs/smb/server/mgmt/share_config.h +++ b/fs/smb/server/mgmt/share_config.h @@ -11,6 +11,8 @@ #include #include +struct ksmbd_work; + struct ksmbd_share_config { char *name; char *path; @@ -68,7 +70,7 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share) __ksmbd_share_config_put(share); } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name); bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, const char *filename); diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index d2c81a8a11dda10ef710bc97e1c059073fddb700..94a52a75014a4384fb80fbc461f3f558341c4f55 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -16,17 +16,18 @@ #include "user_session.h" struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name) +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) { struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; struct sockaddr *peer_addr; + struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; int ret; - sc = ksmbd_share_config_get(conn->um, share_name); + sc = ksmbd_share_config_get(work, share_name); if (!sc) return status; @@ -61,7 +62,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, struct ksmbd_share_config *new_sc; ksmbd_share_config_del(sc); - new_sc = ksmbd_share_config_get(conn->um, share_name); + new_sc = ksmbd_share_config_get(work, share_name); if (!new_sc) { pr_err("Failed to update stale share config\n"); status.ret = -ESTALE; diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 6377a70b811c8993575d96889a8c2ef5f2fada2c..a42cdd0510411437fe074ee3359a648496a36b13 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -13,6 +13,7 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; +struct ksmbd_work; enum { TREE_NEW = 0, @@ -50,8 +51,7 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn, struct ksmbd_session; struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name); +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name); void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c956222eae5438c8887afe3711c20..99416ce9f50183dec9bb1da9fc495b0255396c0c 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 37a39ab4ee6543fbec2adb6dcb5fdfef93492e8d..0bc9edf22ba4076b731e67d411173b2670687170 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } @@ -1955,7 +1958,7 @@ int smb2_tree_connect(struct ksmbd_work *work) ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n", name, treename); - status = ksmbd_tree_conn_connect(conn, sess, name); + status = ksmbd_tree_conn_connect(work, name); if (status.ret == KSMBD_TREE_CONN_STATUS_OK) rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id); else @@ -2210,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted @@ -5357,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5383,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5596,6 +5599,11 @@ int smb2_query_info(struct ksmbd_work *work) ksmbd_debug(SMB, "GOT query info request\n"); + if (ksmbd_override_fsids(work)) { + rc = -ENOMEM; + goto err_out; + } + switch (req->InfoType) { case SMB2_O_INFO_FILE: ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n"); @@ -5614,6 +5622,7 @@ int smb2_query_info(struct ksmbd_work *work) req->InfoType); rc = -EOPNOTSUPP; } + ksmbd_revert_fsids(work); if (!rc) { rsp->StructureSize = cpu_to_le16(9); @@ -5623,6 +5632,7 @@ int smb2_query_info(struct ksmbd_work *work) le32_to_cpu(rsp->OutputBufferLength)); } +err_out: if (rc < 0) { if (rc == -EACCES) rsp->hdr.Status = STATUS_ACCESS_DENIED; diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 474dadf6b7b8bc51b3bfa94c6d29f02621f21cd5..13818ecb6e1b2fb24d8424d0bc70083b810adfe0 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -732,10 +732,10 @@ bool is_asterisk(char *p) return p && p[0] == '*'; } -int ksmbd_override_fsids(struct ksmbd_work *work) +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share) { struct ksmbd_session *sess = work->sess; - struct ksmbd_share_config *share = work->tcon->share_conf; struct cred *cred; struct group_info *gi; unsigned int uid; @@ -775,6 +775,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work) return 0; } +int ksmbd_override_fsids(struct ksmbd_work *work) +{ + return __ksmbd_override_fsids(work, work->tcon->share_conf); +} + void ksmbd_revert_fsids(struct ksmbd_work *work) { const struct cred *cred; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index f1092519c0c288223b1ad7d2e66e9545fc5bcac3..cc1d6dfe29d56534e86ae24128dcc2464506e6e1 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { @@ -447,6 +447,8 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command); int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp); +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share); int ksmbd_override_fsids(struct ksmbd_work *work); void ksmbd_revert_fsids(struct ksmbd_work *work); diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 16bd693d0b3aa23ce87af9cc1540e113a4c2a286..d5918eba27e3711f62d7b4b8a1d7972f22ac6ca5 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; diff --git a/fs/super.c b/fs/super.c index 095ba793e10cf9b487348ec0df66dd47d4038a17..38d72a3cf6fcf88560f13faae8c2452de1fbad8c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -736,6 +736,17 @@ struct super_block *sget_fc(struct fs_context *fc, struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; int err; + /* + * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is + * not set, as the filesystem is likely unprepared to handle it. + * This can happen when fsconfig() is called from init_user_ns with + * an fs_fd opened in another user namespace. + */ + if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) { + errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed"); + return ERR_PTR(-EPERM); + } + retry: spin_lock(&sb_lock); if (test) { diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5d88c184f0fc15321993577eca50325d78db0b68..01e99e98457ddbf35f04f9702ab0f8d15f81f35f 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -112,7 +112,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - call_rcu(&ei->rcu, free_ei_rcu); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) @@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); - ei = NULL; + ei = ERR_PTR(-EBUSY); } return ei; } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1028ab6d9a744b7dde0195b9c5e090b6e8ea2adc..1748dff58c3bc95e1b147d3501fad023f5462f9b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) struct tracefs_inode *ti; unsigned long flags; - ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; @@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) return &ti->vfs_inode; } -static void tracefs_free_inode_rcu(struct rcu_head *rcu) +static void tracefs_free_inode(struct inode *inode) { - struct tracefs_inode *ti; + struct tracefs_inode *ti = get_tracefs(inode); - ti = container_of(rcu, struct tracefs_inode, rcu); kmem_cache_free(tracefs_inode_cachep, ti); } -static void tracefs_free_inode(struct inode *inode) +static void tracefs_destroy_inode(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); unsigned long flags; @@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode) spin_lock_irqsave(&tracefs_inode_lock, flags); list_del_rcu(&ti->list); spin_unlock_irqrestore(&tracefs_inode_lock, flags); - - call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode) static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .show_options = tracefs_show_options, diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index f704d8348357ed8f8a2a11195718b716e7277244..d83c2a25f288e0fbdc2e47de9b711cddf17746c6 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -10,10 +10,7 @@ enum { }; struct tracefs_inode { - union { - struct inode vfs_inode; - struct rcu_head rcu; - }; + struct inode vfs_inode; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ struct list_head list; unsigned long flags; diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 75461777c466b1c93c24cbf05ae942bf0beae573..0b48cbab8a3da9f78b99695c063926a8be8d8cec 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -82,6 +82,7 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /** * ubifs_compress - compress data. + * @c: UBIFS file-system description object * @in_buf: data to compress * @in_len: length of the data to compress * @out_buf: output buffer where compressed data should be stored @@ -140,6 +141,7 @@ no_compr: /** * ubifs_decompress - decompress data. + * @c: UBIFS file-system description object * @in_buf: data to decompress * @in_len: length of the data to decompress * @out_buf: output buffer where decompressed data should diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ac77ac1fd73e351863c0d7ab98f86c1f12a82b9f..d91cec93d9689a1d28f29d31e5869b0f6161e4d0 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2827,9 +2827,9 @@ void dbg_debugfs_init_fs(struct ubifs_info *c) const char *fname; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ return; } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index ed966108da806793f6f6991d9dff67d573709907..d425861e6b821f769ee904bd368455528937813a 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -19,10 +19,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, /* * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + * + 1 for "_" and 2 for UBI device numbers and 3 for volume number and 1 for + * the trailing zero byte. */ #define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2 + 3 + 1) /** * ubifs_debug_info - per-FS debugging information. @@ -103,7 +104,7 @@ struct ubifs_debug_info { unsigned int chk_fs:1; unsigned int tst_rcvry:1; - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index eac0fef801f1433df255713591c12a1e849b1385..c77ea57fe69696cc8ec1037f5f65e91a8b8b89c5 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -71,8 +71,13 @@ static int inherit_flags(const struct inode *dir, umode_t mode) * @is_xattr: whether the inode is xattr inode * * This function finds an unused inode number, allocates new inode and - * initializes it. Returns new inode in case of success and an error code in - * case of failure. + * initializes it. Non-xattr new inode may be written with xattrs(selinux/ + * encryption) before writing dentry, which could cause inconsistent problem + * when powercut happens between two operations. To deal with it, non-xattr + * new inode is initialized with zero-nlink and added into orphan list, caller + * should make sure that inode is relinked later, and make sure that orphan + * removing and journal writing into an committing atomic operation. Returns + * new inode in case of success and an error code in case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, umode_t mode, bool is_xattr) @@ -163,9 +168,25 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, ui->creat_sqnum = ++c->max_sqnum; spin_unlock(&c->cnt_lock); + if (!is_xattr) { + set_nlink(inode, 0); + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + ubifs_err(c, "ubifs_add_orphan failed: %i", err); + goto out_iput; + } + down_read(&c->commit_sem); + ui->del_cmtno = c->cmt_no; + up_read(&c->commit_sem); + } + if (encrypted) { err = fscrypt_set_context(inode, NULL); if (err) { + if (!is_xattr) { + set_nlink(inode, 1); + ubifs_delete_orphan(c, inode->i_ino); + } ubifs_err(c, "fscrypt_set_context failed: %i", err); goto out_iput; } @@ -320,12 +341,13 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -340,8 +362,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -386,7 +408,6 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) return inode; out_inode: - make_bad_inode(inode); iput(inode); out_free: ubifs_err(c, "cannot create whiteout file, error %d", err); @@ -470,6 +491,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); d_tmpfile(file, inode); @@ -479,7 +501,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, mutex_unlock(&ui->ui_mutex); lock_2_inodes(dir, inode); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -492,7 +514,6 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, out_cancel: unlock_2_inodes(dir, inode); out_inode: - make_bad_inode(inode); if (!instantiated) iput(inode); out_budg: @@ -760,10 +781,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); - /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ - if (inode->i_nlink == 0) - ubifs_delete_orphan(c, inode->i_ino); - inc_nlink(inode); ihold(inode); inode_set_ctime_current(inode); @@ -771,7 +788,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, inode->i_nlink == 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -785,8 +802,6 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); - if (inode->i_nlink == 0) - ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -846,7 +861,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -950,7 +965,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -1017,6 +1032,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); insert_inode_hash(inode); inc_nlink(inode); @@ -1025,7 +1041,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) { ubifs_err(c, "cannot create directory, error %d", err); goto out_cancel; @@ -1042,8 +1058,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; drop_nlink(dir); mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1102,22 +1118,25 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) { + kfree(dev); + goto out_inode; + } + init_special_inode(inode, inode->i_mode, rdev); inode->i_size = ubifs_inode(inode)->ui_size = devlen; ui = ubifs_inode(inode); ui->data = dev; ui->data_len = devlen; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1132,10 +1151,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - /* Free inode->i_link before inode is marked as bad. */ - fscrypt_free_inode(inode); - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1186,6 +1203,10 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + ui = ubifs_inode(inode); ui->data = kmalloc(disk_link.len, GFP_NOFS); if (!ui->data) { @@ -1210,17 +1231,14 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1234,10 +1252,10 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: /* Free inode->i_link before inode is marked as bad. */ fscrypt_free_inode(inode); - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1405,14 +1423,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, */ err = ubifs_budget_space(c, &wht_req); if (err) { - /* - * Whiteout inode can not be written on flash by - * ubifs_jnl_write_inode(), because it's neither - * dirty nor zero-nlink. - */ iput(whiteout); goto out_release; } + set_nlink(whiteout, 1); /* Add the old_dentry size to the old_dir size. */ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); @@ -1491,7 +1505,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, - new_inode, &new_nm, whiteout, sync); + new_inode, &new_nm, whiteout, sync, !!whiteout); if (err) goto out_cancel; @@ -1544,6 +1558,7 @@ out_cancel: unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { ubifs_release_budget(c, &wht_req); + set_nlink(whiteout, 0); iput(whiteout); } out_release: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a1f46919934c7d02f1d5f68e84dfdd0d860f8d86..68e104423a48237d506c93d7fc953ebc2d2789f0 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1027,7 +1027,7 @@ static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc, /* Is the folio fully inside i_size? */ if (folio_pos(folio) + len <= i_size) { - if (folio_pos(folio) >= synced_i_size) { + if (folio_pos(folio) + len > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_redirty; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 6ebf3c04ac5feaa75185b2ee6025b8748667140b..643718906b9f06c5322a8cd990d14b69b1d6443b 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -73,7 +73,7 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -340,7 +340,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -581,7 +581,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -773,7 +773,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 74aee92433d75c2fe0880e351519e00a85b1729a..4a35f9e8f6681794740f1bbdf12fd8781188b28f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -359,7 +359,7 @@ static void wake_up_reservation(struct ubifs_info *c) } /** - * wake_up_reservation - add current task in queue or start queuing. + * add_or_start_queue - add current task in queue or start queuing. * @c: UBIFS file-system description object * * This function starts queuing if queuing is not started, otherwise adds @@ -643,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) * @inode: inode to update * @deletion: indicates a directory entry deletion i.e unlink or rmdir * @xent: non-zero if the directory entry is an extended attribute entry + * @in_orphan: indicates whether the @inode is in orphan list * * This function updates an inode by writing a directory entry (or extended * attribute entry), the inode itself, and the parent directory inode (or the @@ -664,7 +665,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent) + int deletion, int xent, int in_orphan) { int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0; int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); @@ -750,7 +751,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_release; - if (last_reference) { + if (last_reference && !in_orphan) { err = ubifs_add_orphan(c, inode->i_ino); if (err) { release_head(c, BASEHD); @@ -806,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_ro; + if (in_orphan && inode->i_nlink) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; @@ -1336,6 +1340,7 @@ out_free: * @new_nm: new name of the new directory entry * @whiteout: whiteout inode * @sync: non-zero if the write-buffer has to be synchronized + * @delete_orphan: indicates an orphan entry deletion for @whiteout * * This function implements the re-name operation which may involve writing up * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) @@ -1348,7 +1353,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync) + const struct inode *whiteout, int sync, int delete_orphan) { void *p; union ubifs_key key; @@ -1565,6 +1570,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_ro; } + if (delete_orphan) + ubifs_delete_orphan(c, whiteout->i_ino); + finish_reservation(c); if (new_inode) { mark_inode_clean(c, new_ui); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index a11c3dab7e168c9e4dd96a9df93c758ea0fb2006..8788740ec57f46238fe4b897fad4d9e15ad9e703 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1005,7 +1005,7 @@ out: * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @lst: lprops statistics to update + * @arg: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 778a22bf9a92fb6a1cc467cff5919f216cf4b619..441d0beca4cf3e82ffa813d6691211637def50af 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1918,6 +1918,7 @@ out_err: * @pnode: where to keep a pnode * @cnode: where to keep a cnode * @in_tree: is the node in the tree in memory + * @ptr: union of node pointers * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in * the tree * @ptr.pnode: ditto for pnode diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 7adc37c10b6af48af4f4b82faa9763b3b8e342f8..a148760fa49ed46e910b1af654f0094801e45037 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -67,10 +67,13 @@ static int mst_node_check_hash(const struct ubifs_info *c, { u8 calc[UBIFS_MAX_HASH_LEN]; const void *node = mst; + int ret; - crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), + ret = crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc); + if (ret) + return ret; if (ubifs_check_hash(c, expected, calc)) return -EPERM; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 4909321d84cf83ccd84bb0a5396dbbf1a6a24b71..fb957d963ba6c0edcb9a0a6eca2009309bbdb0da 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -42,24 +42,30 @@ static int dbg_check_orphans(struct ubifs_info *c); -static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, - struct ubifs_orphan *parent_orphan) +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) { struct ubifs_orphan *orphan, *o; struct rb_node **p, *parent = NULL; orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); if (!orphan) - return ERR_PTR(-ENOMEM); + return -ENOMEM; orphan->inum = inum; orphan->new = 1; - INIT_LIST_HEAD(&orphan->child_list); spin_lock(&c->orphan_lock); if (c->tot_orphans >= c->max_orphans) { spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-ENFILE); + return -ENFILE; } p = &c->orph_tree.rb_node; while (*p) { @@ -73,7 +79,7 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, ubifs_err(c, "orphaned twice"); spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-EINVAL); + return -EINVAL; } } c->tot_orphans += 1; @@ -83,14 +89,9 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, list_add_tail(&orphan->list, &c->orph_list); list_add_tail(&orphan->new_list, &c->orph_new); - if (parent_orphan) { - list_add_tail(&orphan->child_list, - &parent_orphan->child_list); - } - spin_unlock(&c->orphan_lock); dbg_gen("ino %lu", (unsigned long)inum); - return orphan; + return 0; } static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum) @@ -135,6 +136,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) if (orph->cmt) { orph->del = 1; + rb_erase(&orph->rb, &c->orph_tree); orph->dnext = c->orph_dnext; c->orph_dnext = orph; dbg_gen("delete later ino %lu", (unsigned long)orph->inum); @@ -144,59 +146,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) __orphan_drop(c, orph); } -/** - * ubifs_add_orphan - add an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * Add an orphan. This function is called when an inodes link count drops to - * zero. - */ -int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) -{ - int err = 0; - ino_t xattr_inum; - union ubifs_key key; - struct ubifs_dent_node *xent, *pxent = NULL; - struct fscrypt_name nm = {0}; - struct ubifs_orphan *xattr_orphan; - struct ubifs_orphan *orphan; - - orphan = orphan_add(c, inum, NULL); - if (IS_ERR(orphan)) - return PTR_ERR(orphan); - - lowest_xent_key(c, &key, inum); - while (1) { - xent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(xent)) { - err = PTR_ERR(xent); - if (err == -ENOENT) - break; - kfree(pxent); - return err; - } - - fname_name(&nm) = xent->name; - fname_len(&nm) = le16_to_cpu(xent->nlen); - xattr_inum = le64_to_cpu(xent->inum); - - xattr_orphan = orphan_add(c, xattr_inum, orphan); - if (IS_ERR(xattr_orphan)) { - kfree(pxent); - kfree(xent); - return PTR_ERR(xattr_orphan); - } - - kfree(pxent); - pxent = xent; - key_read(c, &xent->key, &key); - } - kfree(pxent); - - return 0; -} - /** * ubifs_delete_orphan - delete an orphan. * @c: UBIFS file-system description object @@ -206,7 +155,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) */ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) { - struct ubifs_orphan *orph, *child_orph, *tmp_o; + struct ubifs_orphan *orph; spin_lock(&c->orphan_lock); @@ -219,11 +168,6 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) return; } - list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { - list_del(&child_orph->child_list); - orphan_delete(c, child_orph); - } - orphan_delete(c, orph); spin_unlock(&c->orphan_lock); @@ -518,7 +462,6 @@ static void erase_deleted(struct ubifs_info *c) dnext = orphan->dnext; ubifs_assert(c, !orphan->new); ubifs_assert(c, orphan->del); - rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); @@ -570,51 +513,6 @@ int ubifs_clear_orphans(struct ubifs_info *c) return 0; } -/** - * insert_dead_orphan - insert an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * This function is a helper to the 'do_kill_orphans()' function. The orphan - * must be kept until the next commit, so it is added to the rb-tree and the - * deletion list. - */ -static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) -{ - struct ubifs_orphan *orphan, *o; - struct rb_node **p, *parent = NULL; - - orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); - if (!orphan) - return -ENOMEM; - orphan->inum = inum; - - p = &c->orph_tree.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_orphan, rb); - if (inum < o->inum) - p = &(*p)->rb_left; - else if (inum > o->inum) - p = &(*p)->rb_right; - else { - /* Already added - no problem */ - kfree(orphan); - return 0; - } - } - c->tot_orphans += 1; - rb_link_node(&orphan->rb, parent, p); - rb_insert_color(&orphan->rb, &c->orph_tree); - list_add_tail(&orphan->list, &c->orph_list); - orphan->del = 1; - orphan->dnext = c->orph_dnext; - c->orph_dnext = orphan; - dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, - c->new_orphans, c->tot_orphans); - return 0; -} - /** * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object @@ -691,12 +589,12 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { - union ubifs_key key1, key2; + union ubifs_key key; inum = le64_to_cpu(orph->inos[i]); - ino_key_init(c, &key1, inum); - err = ubifs_tnc_lookup(c, &key1, ino); + ino_key_init(c, &key, inum); + err = ubifs_tnc_lookup(c, &key, ino); if (err && err != -ENOENT) goto out_free; @@ -708,17 +606,10 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, dbg_rcvry("deleting orphaned inode %lu", (unsigned long)inum); - lowest_ino_key(c, &key1, inum); - highest_ino_key(c, &key2, inum); - - err = ubifs_tnc_remove_range(c, &key1, &key2); + err = ubifs_tnc_remove_ino(c, inum); if (err) goto out_ro; } - - err = insert_dead_orphan(c, inum); - if (err) - goto out_free; } *last_cmt_no = cmt_no; @@ -925,8 +816,12 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, inum = key_inum(c, &zbr->key); if (inum != ci->last_ino) { - /* Lowest node type is the inode node, so it comes first */ - if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + /* + * Lowest node type is the inode node or xattr entry(when + * selinux/encryption is enabled), so it comes first + */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY && + key_type(c, &zbr->key) != UBIFS_XENT_KEY) ubifs_err(c, "found orphan node ino %lu, type %d", (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 17da28d6247ace77b6168853a32b4771d141d6d8..a950c5f2560e367e13edde3ff681146e33ee1a01 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -29,6 +29,7 @@ * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @hash: node hash * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number * @list: links the replay list diff --git a/fs/ubifs/sysfs.c b/fs/ubifs/sysfs.c index 1c958148bb877f7ba6d9c562945798b363d6bd55..aae32222f11b37823ae424158881c651e52d6a0a 100644 --- a/fs/ubifs/sysfs.c +++ b/fs/ubifs/sysfs.c @@ -91,17 +91,17 @@ static struct kset ubifs_kset = { int ubifs_sysfs_register(struct ubifs_info *c) { int ret, n; - char dfs_dir_name[UBIFS_DFS_DIR_LEN+1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; c->stats = kzalloc(sizeof(struct ubifs_stats_info), GFP_KERNEL); if (!c->stats) { ret = -ENOMEM; goto out_last; } - n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ ret = -EINVAL; goto out_free; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1f3ea879d93afb1a9a1f7638f8df3b71ba0b10b2..d69a5a42d6936c32665496ab0a40a47c22365a5b 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -157,13 +157,6 @@ #define UBIFS_HMAC_ARR_SZ 0 #endif -/* - * The UBIFS sysfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. - */ -#define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) - /* * Lockdep classes for UBIFS inode @ui_mutex. */ @@ -923,8 +916,6 @@ struct ubifs_budget_req { * @rb: rb-tree node of rb-tree of orphans sorted by inode number * @list: list head of list of orphans in order added * @new_list: list head of list of orphans added since the last commit - * @child_list: list of xattr children if this orphan hosts xattrs, list head - * if this orphan is a xattr, not used otherwise. * @cnext: next orphan to commit * @dnext: next orphan to delete * @inum: inode number @@ -936,7 +927,6 @@ struct ubifs_orphan { struct rb_node rb; struct list_head list; struct list_head new_list; - struct list_head child_list; struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; @@ -1803,7 +1793,7 @@ int ubifs_consolidate_log(struct ubifs_info *c); /* journal.c */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent); + int deletion, int xent, int in_orphan); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); @@ -1820,7 +1810,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync); + const struct inode *whiteout, int sync, int delete_orphan); int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, loff_t old_size, loff_t new_size); int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 0847db521984ac98a4de135e984c47ed703837d0..f734588b224a31838cc9830093603c4dff9c377a 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -149,7 +149,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (strcmp(fname_name(nm), UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) == 0) host_ui->flags |= UBIFS_CRYPT_FL; - err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + err = ubifs_jnl_update(c, host, nm, inode, 0, 1, 0); if (err) goto out_cancel; ubifs_set_inode_flags(host); diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c index bc1a7c8b5c8dfc10b723fd7fa696c8a287aada38..77b685db827511a4fac03142370a25dbb104c768 100644 --- a/fs/unicode/mkutf8data.c +++ b/fs/unicode/mkutf8data.c @@ -3352,6 +3352,7 @@ static void write_file(void) fprintf(file, "};\n"); fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);"); fprintf(file, "\n"); + fprintf(file, "MODULE_DESCRIPTION(\"UTF8 data table\");\n"); fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n"); fclose(file); } diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c index eb2bbdd688d717e36f57f7e84802ace6e9b9c9f7..600e15efe9edfdf6d04cecd162e84f1f5a59c5e1 100644 --- a/fs/unicode/utf8-selftest.c +++ b/fs/unicode/utf8-selftest.c @@ -14,8 +14,8 @@ #include "utf8n.h" -unsigned int failed_tests; -unsigned int total_tests; +static unsigned int failed_tests; +static unsigned int total_tests; /* Tests will be based on this version. */ #define UTF8_LATEST UNICODE_AGE(12, 1, 0) @@ -307,4 +307,5 @@ module_init(init_test_ucd); module_exit(exit_test_ucd); MODULE_AUTHOR("Gabriel Krisman Bertazi "); +MODULE_DESCRIPTION("Kernel module for testing utf-8 support"); MODULE_LICENSE("GPL"); diff --git a/fs/unicode/utf8data.c_shipped b/fs/unicode/utf8data.c_shipped index d9b62901aa96b76d727318950ef6cbc4c0e37290..dafa5fed761d83bc5f9f1390c61b28de6e4a43a7 100644 --- a/fs/unicode/utf8data.c_shipped +++ b/fs/unicode/utf8data.c_shipped @@ -4120,4 +4120,5 @@ struct utf8data_table utf8_data_table = { .utf8data = utf8data, }; EXPORT_SYMBOL_GPL(utf8_data_table); +MODULE_DESCRIPTION("UTF8 data table"); MODULE_LICENSE("GPL v2"); diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index cb035da3f990b0d9db135678c7a09d70040cde58..fb05f44f6c754ac214145d8690a7e42ebc549ece 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; * And, of course, we also need to take into account the dquot log format item * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) \ +#define XFS_DQUOT_LOGRES \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 3dc8f785bf2947dec439cd5c9009772ab3ebef24..45aaf169806aa5ec4b23cf17d7d41406d8be4a1c 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -338,11 +338,11 @@ xfs_calc_write_reservation( blksz); t1 += adj; t3 += adj; - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 1); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation( xfs_refcountbt_block_count(mp, 4), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 2); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -466,7 +466,7 @@ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -577,7 +577,7 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -641,7 +641,7 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -729,7 +729,7 @@ xfs_calc_icreate_reservation( struct xfs_mount *mp) { struct xfs_trans_resv *resp = M_RES(mp); - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; unsigned int t1, t2, t3 = 0; t1 = xfs_calc_icreate_resv_alloc(mp); @@ -747,7 +747,7 @@ STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { - uint res = XFS_DQUOT_LOGRES(mp); + uint res = XFS_DQUOT_LOGRES; res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); @@ -829,7 +829,7 @@ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + @@ -846,7 +846,7 @@ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); @@ -955,7 +955,7 @@ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + @@ -1003,7 +1003,7 @@ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); @@ -1043,7 +1043,7 @@ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + max((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)) + diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0dbc484b182f9813adc26e3efcf3cd1d6a73b652..2f98d90d7fd66d099c9f8f78725e77700c540a78 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -696,7 +696,7 @@ xrep_agfl_init_header( * step. */ xagb_bitmap_init(&af.used_extents); - af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), + af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f11b06ab0f46dae93d84099b14e1c8..5ab2ac53c920020816b39d93562fa819454a72b2 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 733c410a22797a91c3d2395677332667dc9496e3..91e7b51ce0680bc891206255afd4fe20f5f78739 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -799,7 +799,7 @@ xchk_parent_pptr( } if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * Complain if the number of parent pointers doesn't match the link diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 92ef4cdc486e901ab786bc538faf061c350270d5..c886d5d0eb021a7683a3f84d2819be052697c305 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; __entry->ino = file_inode(xf->file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(xf->file, pathname, sizeof(pathname) - 1); + path = file_path(xf->file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("xfino 0x%lx path '%s'", __entry->ino, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 5c947e5ce8b8811d659c9b964f0d2e74fc2bf7c1..7db38630487514d553facdc3421e37349c561e3d 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -139,7 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ - sbp->value = &sfe->nameval[sfe->namelen], + sbp->value = &sfe->nameval[sfe->namelen]; sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12b94ad4b76113ec226fefbe819f1c..6b13666d4e96355aba7de80b5a1718df54dcd518 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5646d300b2861e1f6de24594d17dabda371a80b2..180ce697305a92ea829f7e55ac5af55ce7ed8709 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(file, pathname, sizeof(pathname) - 1); + path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe4447525bb0fc964d27089a791331..8ede9d099d1fea6b6399b5a0617246baee057c07 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ab3d22f662f280cd08bcc463599c2286d2b351ba..eaf849260bd672fc1ce02040902b4fff9c5676ce 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,24 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); + /* + * Some xattrs must be resistant to allocation failure at ENOSPC, e.g. + * creating an inode with ACLs or security attributes requires the + * allocation of the xattr holding that information to succeed. Hence + * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY + * (LSM xattr) namespaces to dip into the reserve block pool to allow + * manipulation of these xattrs when at ENOSPC. These VFS xattr + * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk + * namespaces. + * + * For most of these cases, these special xattrs will fit in the inode + * itself and so consume no extra space or only require temporary extra + * space while an overwrite is being made. Hence the use of the reserved + * pool is largely to avoid the worst case reservation from preventing + * the xattr from being created at ENOSPC. + */ + return xfs_attr_set(args, op, + args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE)); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d5274c952d350e5c93fe0acf14d9e5e8..9f1c1d225e32c62a7f728a2711d5ff9045e86bcd 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,12 +660,9 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ad6afc5c49183a8043536fe7129901d3fa9c5717..1ae44793132a8d9a6349aff7dd51299f50cf820e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -911,13 +911,12 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) -#define RUNTIME_NAME(t,x) runtime_##t##_##x +#define NAMED_SECTION(name) \ + . = ALIGN(8); \ + name : AT(ADDR(name) - LOAD_OFFSET) \ + { BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) } -#define RUNTIME_CONST(t,x) \ - . = ALIGN(8); \ - RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ - *(RUNTIME_NAME(t,x)); \ - } +#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce554831bb3fa43b79a8a17aec91562..9689a7c5dd36b25d7ddefa0ec9fd4de210349829 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); diff --git a/include/dt-bindings/i3c/i3c.h b/include/dt-bindings/i3c/i3c.h new file mode 100644 index 0000000000000000000000000000000000000000..373439218bba6ec7de0ce86d5faaa21a7c2cb690 --- /dev/null +++ b/include/dt-bindings/i3c/i3c.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright 2024 NXP + */ + +#ifndef _DT_BINDINGS_I3C_I3C_H +#define _DT_BINDINGS_I3C_I3C_H + +#define I2C_FM (1 << 4) +#define I2C_FM_PLUS (0 << 4) + +#define I2C_FILTER (0 << 5) +#define I2C_NO_FILTER_HIGH_FREQUENCY (1 << 5) +#define I2C_NO_FILTER_LOW_FREQUENCY (2 << 5) + +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f0b95c76c707fbcf964dc5871929c5e55874d274..0687a442fec7d571f57a025492ad9345390ae63d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,6 +259,12 @@ static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif +#ifdef CONFIG_RISCV +void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa); +#else +static inline void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { } +#endif + #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01b3221b3da74970ba3c445eaa7fbd..d3b66d77df7a3c029e6e2c8b50abcf531eb88dfb 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } +static inline void bitmap_copy_and_extend(unsigned long *to, + const unsigned long *from, + unsigned int count, unsigned int size) +{ + unsigned int copy = BITS_TO_LONGS(count); + + memcpy(to, from, copy * sizeof(long)); + if (count % BITS_PER_LONG) + to[copy - 1] &= BITMAP_LAST_WORD_MASK(count); + memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long)); +} + /* * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64 * machines the order of hi and lo parts of numbers match the bitmap structure. diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 46d4bdc634c0bbaa7da315529d43937abaf205c2..ba35bbf07798b9548ddf28e9efb97eadc3a9a016 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,12 +47,17 @@ extern unsigned long __sw_hweight64(__u64 w); __builtin_constant_p(*(const unsigned long *)(addr))) ? \ const##op(nr, addr) : op(nr, addr)) +/* + * The following macros are non-atomic versions of their non-underscored + * counterparts. + */ #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) #define __change_bit(nr, addr) bitop(___change_bit, nr, addr) #define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) + #define test_bit(nr, addr) bitop(_test_bit, nr, addr) #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 634a302a39e39d66ef3406ba4c104992928ae6cc..279b4a6406442c77854bafa824c0c7eb03000c19 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -874,8 +874,8 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? - prog->aux->dst_prog->type : prog->type; + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? + prog->aux->saved_dst_prog_type : prog->type; } static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2594553bb30b10d14dda5f634398b8596e6bbb7e..2df665fa2964d36b96cf4d11ca7029870c031ca2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -296,6 +296,15 @@ static inline void *offset_to_ptr(const int *off) #define is_signed_type(type) (((type)(-1)) < (__force type)1) #define is_unsigned_type(type) (!is_signed_type(type)) +/* + * Useful shorthand for "is this condition known at compile-time?" + * + * Note that the condition may involve non-constant values, + * but the compiler may know enough about the details of the + * values to determine that the condition is statically true. + */ +#define statically_true(x) (__builtin_constant_p(x) && (x)) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 89f5c34ce4df916302a9d391ec32b5d66bcec3c5..9316c39260e0e43d92ef0cbbf9f44dc9c9d18bee 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,7 +100,6 @@ enum cpuhp_state { CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, - CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, @@ -122,6 +121,7 @@ enum cpuhp_state { CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, + CPUHP_TMIGR_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, + CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 099e8b32dd68a45aed4bd3a3b8697a62d769c47e..53158de44b837854d3b2d047beebaf632f390b54 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1033,53 +1033,17 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void -set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &__cpu_possible_mask); - else - cpumask_clear_cpu(cpu, &__cpu_possible_mask); -} +#define assign_cpu(cpu, mask, val) \ + assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) -static inline void -set_cpu_enabled(unsigned int cpu, bool can_be_onlined) -{ - if (can_be_onlined) - cpumask_set_cpu(cpu, &__cpu_enabled_mask); - else - cpumask_clear_cpu(cpu, &__cpu_enabled_mask); -} - -static inline void -set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &__cpu_present_mask); - else - cpumask_clear_cpu(cpu, &__cpu_present_mask); -} +#define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) +#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) +#define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) +#define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) +#define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) void set_cpu_online(unsigned int cpu, bool online); -static inline void -set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &__cpu_active_mask); - else - cpumask_clear_cpu(cpu, &__cpu_active_mask); -} - -static inline void -set_cpu_dying(unsigned int cpu, bool dying) -{ - if (dying) - cpumask_set_cpu(cpu, &__cpu_dying_mask); - else - cpumask_clear_cpu(cpu, &__cpu_dying_mask); -} - /** * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 60b25020281ff38c6425a8e733b8c2f183c319ac..0bea1afbd747c4937b15703b581c569e7fa45ae4 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -21,6 +21,21 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +struct cxl_event_media_hdr { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + /* + * The meaning of Validity Flags from bit 2 is + * different across DRAM and General Media records + */ + u8 validity_flags[2]; + u8 channel; + u8 rank; +} __packed; + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_generic { struct cxl_event_record_hdr hdr; @@ -33,14 +48,7 @@ struct cxl_event_generic { */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 device[3]; u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; u8 reserved[46]; @@ -52,14 +60,7 @@ struct cxl_event_gen_media { */ #define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 nibble_mask[3]; u8 bank_group; u8 bank; @@ -95,21 +96,13 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; -/* - * General Media or DRAM Event Common Fields - * - provides common access to phys_addr - */ -struct cxl_event_common { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; -} __packed; - union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; - struct cxl_event_common common; + /* dram & gen_media event header */ + struct cxl_event_media_hdr media_hdr; } __packed; /* diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17915e9c682acf01e4b80498ed7838..989c94eddb2b4ffd45c32aa0b440a6a2016f84b2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739f3840fecdfcce122a5984b81e8cd..59b146a14dcad02ca14091189c228bcbab229437 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), * * f = dentry_open(&path, O_RDONLY, current_cred()); * if (IS_ERR(f)) - * return PTR_ERR(fd); + * return PTR_ERR(f); * * fd_install(fd, f); * return take_fd(fd); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 00abe0e5d602bc8d51545aae2bc3c60c6f0c9f4f..1cca14cf56527be1d1e4f45d8ccfae8a86c66a7a 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -462,9 +462,8 @@ struct fw_iso_packet { /* rx: Sync bit, wait for matching sy */ u32 tag:2; /* tx: Tag in packet header */ u32 sy:4; /* tx: Sy in packet header */ - u32 header_length:8; /* Length of immediate header */ - /* tx: Top of 1394 isoch. data_block */ - u32 header[] __counted_by(header_length); + u32 header_length:8; /* Size of immediate header */ + u32 header[]; /* tx: Top of 1394 isoch. data_block */ }; #define FW_ISO_CONTEXT_TRANSMIT 0 diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b52988a7131493cd56fa8ea640ad9..fb0426f349fc5c93f13e8165418448ea560d8db1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cff002be83eb9167b254841715fbea5ae0f6ab7f..e25d9ebfdf89aabdf86dd44bd7a187903290894f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -74,14 +74,20 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* - * Mask of all large folio orders supported for file THP. + * Mask of all large folio orders supported for file THP. Folios in a DAX + * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to + * it. */ -#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DAX \ + (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DEFAULT \ + ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) /* * Mask of all large folio orders supported for THP. */ -#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) +#define THP_ORDERS_ALL \ + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a01107dfd7f055a3f02897013d312e..45bf05ad5c53a1a9390bd1e8aeb9ff96dd848664 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 07e33bbc9256dd04ca9e60921c7663cd22845fe7..377def4972985cddb9ee6ee223b2cd3521346b02 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0ca27dd869561fd96c8f2a1470a0f8adae6d4d5e..074f632868d98e2570e109e952c827e92e4e3c89 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -33,6 +33,7 @@ enum { struct i3c_master_controller; struct i3c_bus; struct i3c_device; +extern const struct bus_type i3c_bus_type; /** * struct i3c_i2c_dev_desc - Common part of the I3C/I2C device descriptor diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3bb6198d152346595b8e6a1e7a9547caee08f60c..3315005df117b8b1c5ddb1fab39db6c77bd7af81 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -404,7 +404,7 @@ struct io_ring_ctx { spinlock_t napi_lock; /* napi_list lock */ /* napi busy poll default timeout */ - unsigned int napi_busy_poll_to; + ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; bool napi_enabled; @@ -461,7 +461,6 @@ enum { REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, REQ_F_POLL_NO_LAZY_BIT, - REQ_F_CANCEL_SEQ_BIT, REQ_F_CAN_POLL_BIT, REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, @@ -536,8 +535,6 @@ enum { REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), - /* cancel sequence is set and valid */ - REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), /* file is pollable */ REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), /* buffer list was empty after selection of buffer */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311850173a1871c0c821fa15c7f01bb..04cbdae0052eb23cf6c080d2a01b3fd028f15e48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a755a24057985389f5c3019cfc990d..b23c6d48392f7c4e37252bc19de9aacfe6759b5f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -2414,7 +2421,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs); + unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, @@ -2445,11 +2452,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * @@ -2476,8 +2483,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7e2eb091049a0814cf02f7acc14591b5f0f268d9..0e5bf25d324f0ca3346c4f2065c56e9c5626f4f2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -109,6 +109,7 @@ struct mem_cgroup_per_node { /* Fields which get updated often at the end. */ struct lruvec lruvec; + CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; }; diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 2ec559284a9f6c3b170e8f69dcb557a3097c7e44..98008dd92153db10c672155bca93201ffabee994 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -26,56 +26,100 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* is_signed_type() isn't a constexpr for pointer types */ -#define __is_signed(x) \ - __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ - is_signed_type(typeof(x)), 0) +/* + * __sign_use for integer expressions: + * bit #0 set if ok for unsigned comparisons + * bit #1 set if ok for signed comparisons + * + * In particular, statically non-negative signed integer + * expressions are ok for both. + * + * NOTE! Unsigned types smaller than 'int' are implicitly + * converted to 'int' in expressions, and are accepted for + * signed conversions for now. This is debatable. + * + * Note that 'x' is the original expression, and 'ux' is + * the unique variable that contains the value. + * + * We use 'ux' for pure type checking, and 'x' for when + * we need to look at the value (but without evaluating + * it for side effects! Careful to only ever evaluate it + * with sizeof() or __builtin_constant_p() etc). + * + * Pointers end up being checked by the normal C type + * rules at the actual comparison, and these expressions + * only need to be careful to not cause warnings for + * pointer use. + */ +#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) +#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) +#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ + __signed_type_use(x,ux):__unsigned_type_use(x,ux)) -/* True for a non-negative signed int constant */ -#define __is_noneg_int(x) \ - (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) +/* + * To avoid warnings about casting pointers to integers + * of different sizes, we need that special sign type. + * + * On 64-bit we can just always use 'long', since any + * integer or pointer type can just be cast to that. + * + * This does not work for 128-bit signed integers since + * the cast would truncate them, but we do not use s128 + * types in the kernel (we do use 'u128', but they will + * be handled by the !is_signed_type() case). + * + * NOTE! The cast is there only to avoid any warnings + * from when values that aren't signed integer types. + */ +#ifdef CONFIG_64BIT + #define __signed_type(ux) long +#else + #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#endif +#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) + +#define __types_ok(x,y,ux,uy) \ + (__sign_use(x,ux) & __sign_use(y,uy)) -#define __types_ok(x, y) \ - (__is_signed(x) == __is_signed(y) || \ - __is_signed((x) + 0) == __is_signed((y) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok3(x,y,z,ux,uy,uz) \ + (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) #define __cmp_op_min < #define __cmp_op_max > #define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) -#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - static_assert(__types_ok(x, y), \ - #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ - __cmp(op, unique_x, unique_y); }) +#define __cmp_once_unique(op, type, x, y, ux, uy) \ + ({ type ux = (x); type uy = (y); __cmp(op, ux, uy); }) + +#define __cmp_once(op, type, x, y) \ + __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) + +#define __careful_cmp_once(op, x, y, ux, uy) ({ \ + __auto_type ux = (x); __auto_type uy = (y); \ + BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + #op"("#x", "#y") signedness error"); \ + __cmp(op, ux, uy); }) -#define __careful_cmp(op, x, y) \ - __builtin_choose_expr(__is_constexpr((x) - (y)), \ - __cmp(op, x, y), \ - __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) +#define __careful_cmp(op, x, y) \ + __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ +#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ + __auto_type uval = (val); \ + __auto_type ulo = (lo); \ + __auto_type uhi = (hi); \ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ - __clamp(unique_val, unique_lo, unique_hi); }) + BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ + __clamp(uval, ulo, uhi); }) -#define __careful_clamp(val, lo, hi) ({ \ - __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ - __clamp(val, lo, hi), \ - __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ - __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) +#define __careful_clamp(val, lo, hi) \ + __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) /** * min - return minimum of two values of the same or compatible types @@ -108,13 +152,20 @@ #define umax(x, y) \ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) +#define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ + __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ + BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + #op"3("#x", "#y", "#z") signedness error"); \ + __cmp(op, ux, __cmp(op, uy, uz)); }) + /** * min3 - return minimum of three values * @x: first value * @y: second value * @z: third value */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define min3(x, y, z) \ + __careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * max3 - return maximum of three values @@ -122,7 +173,8 @@ * @y: second value * @z: third value */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) +#define max3(x, y, z) \ + __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero @@ -158,7 +210,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) +#define min_t(type, x, y) __cmp_once(min, type, x, y) /** * max_t - return maximum of two values, using the specified type @@ -166,7 +218,7 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) +#define max_t(type, x, y) __cmp_once(max, type, x, y) /* * Do not check the array parameter using __must_be_array(). @@ -270,4 +322,13 @@ static inline bool in_range32(u32 val, u32 start, u32 len) #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* + * Use these carefully: no type checking, and uses the arguments + * multiple times. Use for obvious constants only. + */ +#define MIN(a,b) __cmp(min,a,b) +#define MAX(a,b) __cmp(max,a,b) +#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) +#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) + #endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 4563b560ced7bc6e41fd3745357337f6e3c6c63e..6549d0979b28f93332db0457bfa70942ff2cf43a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} @@ -3137,21 +3148,7 @@ extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); /* Free the reserved page into the buddy system, so it gets managed. */ -static inline void free_reserved_page(struct page *page) -{ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - adjust_managed_page_count(page, 1); -} +void free_reserved_page(struct page *page); #define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a346d8d51eca6fc24d5ded4e0b682..1dc6248feb83246d39e85425ba49725b7ca9269e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d0288938cc2dc33579ae70bf462807340cf3a72..983816608f15d7b3c3dc0513bbcc0490f7c45c77 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,8 +73,6 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark - * write to cache on read */ }; /* @@ -269,7 +267,6 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c12a329dd4635e1ec4f5e9124a65aa408b639b58..7b2ae2e435447f987f4e91e32b0e7bb15026a873 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -485,6 +485,9 @@ enum { NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, + NVME_ID_NS_NVM_QPIF_SHIFT = 9, + NVME_ID_NS_NVM_QPIF_MASK = 0xf, + NVME_ID_NS_NVM_QPIFS = 1 << 3, }; static inline __u8 nvme_elbaf_sts(__u32 elbaf) @@ -497,6 +500,11 @@ static inline __u8 nvme_elbaf_guard_type(__u32 elbaf) return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK; } +static inline __u8 nvme_elbaf_qualified_guard_type(__u32 elbaf) +{ + return (elbaf >> NVME_ID_NS_NVM_QPIF_SHIFT) & NVME_ID_NS_NVM_QPIF_MASK; +} + struct nvme_id_ctrl_nvm { __u8 vsl; __u8 wzsl; @@ -576,6 +584,7 @@ enum { NVME_NVM_NS_16B_GUARD = 0, NVME_NVM_NS_32B_GUARD = 1, NVME_NVM_NS_64B_GUARD = 2, + NVME_NVM_NS_QTYPE_GUARD = 3, }; static inline __u8 nvme_lbaf_index(__u8 flbas) diff --git a/include/linux/of.h b/include/linux/of.h index 13cf7a43b47338aa8c18cac62491b1d8c83eda5a..85b60ac9eec50bb202aa774cab273e1d947e394d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -430,11 +430,9 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) /* - * struct property *prop; - * const __be32 *p; * u32 u; * - * of_property_for_each_u32(np, "propname", prop, p, u) + * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, @@ -1431,11 +1429,12 @@ static inline int of_property_read_s32(const struct device_node *np, err == 0; \ err = of_phandle_iterator_next(it)) -#define of_property_for_each_u32(np, propname, prop, p, u) \ - for (prop = of_find_property(np, propname, NULL), \ - p = of_prop_next_u32(prop, NULL, &u); \ - p; \ - p = of_prop_next_u32(prop, p, &u)) +#define of_property_for_each_u32(np, propname, u) \ + for (struct {struct property *prop; const __be32 *item; } _it = \ + {of_find_property(np, propname, NULL), \ + of_prop_next_u32(_it.prop, NULL, &u)}; \ + _it.item; \ + _it.item = of_prop_next_u32(_it.prop, _it.item, &u)) #define of_property_for_each_string(np, propname, prop, s) \ for (prop = of_find_property(np, propname, NULL), \ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 547e82cdc89a443bd9ef30749f832f7e7c5db348..fc6b9c87cb0a8c9f44326d4739b9bff98c5e0d65 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,13 +41,13 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b0367c00ca940cb9aa95e0e95de0d..54d90b6c5f47bd6978654b3f61261943a37e2052 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d93d9c80ba46b708763dad3c15752e..207f0c83c8e9750ac7050cc488f1618a69c4fd5c 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/include/linux/profile.h b/include/linux/profile.h index 2fb487f61d12abf8e8a881467cf598e646995fa0..3f53cdb0c27c1a2be13d63199f2949a2033f174c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -10,7 +10,6 @@ #define CPU_PROFILING 1 #define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 #define KVM_PROFILING 4 struct proc_dir_entry; diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d3cabd34e40f439eae513e4cdc2a5..35f039ecb2725618ca098e3515c6e19e2aece3ee 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471ede371061cded9c0c406b7653501d..fd35d4ec12e1d5a7b82e09c1a932418491c41630 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); struct buffer_data_read_page; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a039060238312b77df65889ad0af7e6..d47d5f14ff992f74bd2962dbca9d89d8256109b3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474bd4d1aad8c14a544dc74c8df3b0a..b86ddca46b9e81de578aa5dbc9eecccb993117e1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9435185c10ef72305078713e2ddaa57329064b3b..42bedcddd5113e96920a1c5f6baab88645242ef9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -680,7 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; - atomic_t ref; /* ref count for opened files */ + refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 3064314f48329cac729b9b4e29fc62072ec2df3b..d8e4105a2f21b1d5bc4ad50b8246a01e483d4b79 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -138,13 +139,26 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) return raw_copy_to_user(to, from, n); } -#ifdef INLINE_COPY_FROM_USER +/* + * Architectures that #define INLINE_COPY_TO_USER use this function + * directly in the normal copy_to/from_user(), the other ones go + * through an extern _copy_to/from_user(), which expands the same code + * here. + * + * Rust code always uses the extern definition. + */ static inline __must_check unsigned long -_copy_from_user(void *to, const void __user *from, unsigned long n) +_inline_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); @@ -153,14 +167,11 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) memset(to + (n - res), 0, res); return res; } -#else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); -#endif -#ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long -_copy_to_user(void __user *to, const void *from, unsigned long n) +_inline_copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) @@ -171,25 +182,32 @@ _copy_to_user(void __user *to, const void *from, unsigned long n) } return n; } -#else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); -#endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (check_copy_size(to, n, false)) - n = _copy_from_user(to, from, n); - return n; + if (!check_copy_size(to, n, false)) + return n; +#ifdef INLINE_COPY_FROM_USER + return _inline_copy_from_user(to, from, n); +#else + return _copy_from_user(to, from, n); +#endif } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (check_copy_size(from, n, true)) - n = _copy_to_user(to, from, n); - return n; + if (!check_copy_size(from, n, true)) + return n; + +#ifdef INLINE_COPY_TO_USER + return _inline_copy_to_user(to, from, n); +#else + return _copy_to_user(to, from, n); +#endif } #ifndef copy_mc_to_kernel diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ecc5cb7b8c91ed6f7b663d1407d6b2927e40953f..4b16844c6bc29a3c29f4236b872b3f4f5291efe5 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,6 +10,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -109,6 +110,8 @@ struct virtio_admin_cmd { __le64 group_member_id; struct scatterlist *data_sg; struct scatterlist *result_sg; + struct completion completion; + int ret; }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ab4b9a3fef6ba12fecf83cc89f120f4662f2ab9c..169c7d367facb36dcabf9596068580ea8b8516c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -104,8 +104,6 @@ struct virtqueue_info { * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. - * @create_avq: create admin virtqueue resource. - * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -133,8 +131,6 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); - int (*create_avq)(struct virtio_device *vdev); - void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c32d51dae4264bf97b498d5f618b03..6c395a2600e8d18ec6ef73287c24b53d2f98515e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ retry: if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd179420363cb8ec510e4691ee32b3d8d93fac..9eb77c9007e6231bdc8753f944455ccf26d39c87 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,13 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ @@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5cec874b8b94088020b1199c8b8c0..24d970f7a4fa22d289e07c1bbaa0dd8bc7b972bb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -230,8 +230,12 @@ struct vsock_tap { int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); +int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 6439fd8b437b66ac2f704fe11cbb25c86fd525b6..7caa334f48880e500e0f889de9561ae724e5ddc7 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -275,6 +275,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a6aa112e57416ce04b4e9ada51b39b52a2e8de68..a51acefa785f72980f46c2deda5b76dbb4751fab 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba return 0; } +static inline bool cs35l56_is_otp_register(unsigned int reg) +{ + return (reg >> 16) == 3; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index ceca69b46a821416ad095cb5e36a1e16a77eb048..bf2e381cd124b657af5111257439dbc0db1d9ed8 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked( const char *pin); /* component controls */ +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl); +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 99c41bfc7827e4e5d9bf9d9a7fe548481bebbf5c..00fd4d449ff35c0e2a3eb34b08ee27bd112f3009 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -16,11 +16,11 @@ #define __TAS2781_TLV_H__ static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); -static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); -static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); +static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); /* pow(10, db/20) * pow(2,30) */ -static const unsigned char tas2563_dvc_table[][4] = { +static const __maybe_unused unsigned char tas2563_dvc_table[][4] = { { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index 28c364c63245d035004d5b6210a4181dbfc135f6..d099ae27f8491afbbcc65dea5f22149d448b7a92 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank { unsigned char cc_nrpn_msb, cc_nrpn_lsb; unsigned char cc_data_msb, cc_data_lsb; unsigned char cc_bank_msb, cc_bank_lsb; + bool cc_data_msb_set, cc_data_lsb_set; }; /* context for converting from MIDI1 byte stream to UMP packet */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 246c0fbd582ee308bf35caaff97b796631ff86a3..0a523023bdcc38e3c20a0d28a58a836381cb34b0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 09e72215b9f9bb53ec363d7690e9b87a09d172cb..085b749cdd97e6598dd1c66631f45eda3b73ee14 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index da23484268dfcfd6deeee9b9c40447b034d775fc..606b4a0f92dae2763ec9f389aa3ae96ced962f81 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -51,6 +51,7 @@ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ @@ -145,6 +146,7 @@ EM(netfs_folio_trace_clear_g, "clear-g") \ EM(netfs_folio_trace_clear_s, "clear-s") \ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h index 79f19e76a80bf1ef0fb4add82919296883bd9dc0..47db5eaf2f9ab75669abb54162d4e6ea55f0ee0d 100644 --- a/include/trace/events/timer_migration.h +++ b/include/trace/events/timer_migration.h @@ -43,7 +43,7 @@ TRACE_EVENT(tmigr_connect_child_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -52,11 +52,11 @@ TRACE_EVENT(tmigr_connect_child_parent, __entry->lvl = child->parent->level; __entry->numa_node = child->parent->numa_node; __entry->num_children = child->parent->num_children; - __entry->childmask = child->childmask; + __entry->groupmask = child->groupmask; ), - TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->child, __entry->childmask, __entry->parent, + TP_printk("group=%p groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->child, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); @@ -72,7 +72,7 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -81,11 +81,11 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __entry->lvl = tmc->tmgroup->level; __entry->numa_node = tmc->tmgroup->numa_node; __entry->num_children = tmc->tmgroup->num_children; - __entry->childmask = tmc->childmask; + __entry->groupmask = tmc->groupmask; ), - TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->cpu, __entry->childmask, __entry->parent, + TP_printk("cpu=%d groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->cpu, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 985a262d0f9eee32cfb0b55b8a331c379b015403..5bf6148cac2b93e36f362236545e2e6eb473d5ff 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) -#define __NR_uretprobe 463 -__SYSCALL(__NR_uretprobe, sys_uretprobe) - #undef __NR_syscalls -#define __NR_syscalls 464 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac42d9287ea948dedb5b81325cfbcb..adc2524fd8e3dca4be2a5967eee3bd1bcebacd9a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6a60e57e46f7c6bff650dc7d3c637..5fad3d0fcd70788de7c7cb67d4175c43574f0191 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include +#include #define NSIO 0xb7 @@ -16,7 +17,7 @@ /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) /* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 2289b7c76c59aa946abd4375322fdb45ec456008..832c15d9155bdbb95827c3e8214da066efe0f55e 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -51,6 +51,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc5294e926d73b40d201f039474b28..f33d914d8f4699eddef9f9910ec2b30440c26e83 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a43b14276bc33dd5ca04dc94249578a3177a9b6f..cac0cdb9a916c733f6dd8f6622b3c57bccd3abe3 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1109,6 +1109,7 @@ struct ufs_hba { bool ext_iid_sup; bool scsi_host_added; bool mcq_sup; + bool lsdb_sup; bool mcq_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65facdd67a62cfa32fbf48f8a7dbd7..9917c7743d808c49a67a5008793a44273862f176 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -77,6 +77,7 @@ enum { MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, MASK_CRYPTO_SUPPORT = 0x10000000, + MASK_LSDB_SUPPORT = 0x20000000, MASK_MCQ_SUPPORT = 0x40000000, }; diff --git a/init/Kconfig b/init/Kconfig index c41260ffe99a928ba80a9db8ef2a6a419267f095..5783a0b8751726ad97189d786dca9ee43cb8cd3c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1902,6 +1902,7 @@ config RUST depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT + depends on !SHADOW_CALL_STACK depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE help Enables Rust support in the kernel. @@ -1919,12 +1920,15 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(shell,$(RUSTC) --version 2>/dev/null)" config BINDGEN_VERSION_TEXT string depends on RUST - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n) + # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 + # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when + # the minimum version is upgraded past that (0.69.1 already fixed the issue). + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8e6faa942a6f9de66b2a859771606d5855a68497..3942db160f18ef3bc09c6f666724263a4117635d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1849,7 +1849,7 @@ fail: } while (1); /* avoid locking problems by failing it from a clean context */ - if (ret < 0) + if (ret) io_req_task_queue_fail(req, ret); } @@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (uts) { struct timespec64 ts; + ktime_t dt; if (get_timespec64(&ts, uts)) return -EFAULT; - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - io_napi_adjust_timeout(ctx, &iowq, &ts); + dt = timespec64_to_ktime(ts); + iowq.timeout = ktime_add(dt, ktime_get()); + io_napi_adjust_timeout(ctx, &iowq, dt); } if (sig) { @@ -3031,8 +3033,11 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) bool loop = false; io_uring_drop_tctx_refs(current); + if (!tctx_inflight(tctx, !cancel_all)) + break; + /* read completions before cancelations */ - inflight = tctx_inflight(tctx, !cancel_all); + inflight = tctx_inflight(tctx, false); if (!inflight) break; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e1ce908f067992f21db86c45753aaafccf7e6be0..c2acf6180845db760c3ae4917ff8227302139e95 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -43,7 +43,7 @@ struct io_wait_queue { ktime_t timeout; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int napi_busy_poll_to; + ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; #endif }; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 29fa9285a33d9e1b4c3f2f1ff80e9858de318f4e..7fd9badcfaf811475389a5ede8f0aaa3526c5a50 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -110,10 +110,10 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) if (spin_trylock(&ctx->msg_lock)) { req = io_alloc_cache_get(&ctx->msg_cache); spin_unlock(&ctx->msg_lock); + if (req) + return req; } - if (req) - return req; - return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN); + return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); } static int io_msg_data_remote(struct io_kiocb *req) diff --git a/io_uring/napi.c b/io_uring/napi.c index 762254a7ff3ffc0c9ee61897f7fac4a06ed5ca4b..1de1d4d629254c13af841d96c6019c0545ab53f8 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -26,13 +26,18 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; - e->timeout = jiffies + NAPI_TIMEOUT; return e; } return NULL; } +static inline ktime_t net_to_ktime(unsigned long t) +{ + /* napi approximating usecs, reverse busy_loop_current_time */ + return ns_to_ktime(t << 10); +} + void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) { struct hlist_head *hash_list; @@ -102,14 +107,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) __io_napi_remove_stale(ctx); } -static inline bool io_napi_busy_loop_timeout(unsigned long start_time, - unsigned long bp_usec) +static inline bool io_napi_busy_loop_timeout(ktime_t start_time, + ktime_t bp) { - if (bp_usec) { - unsigned long end_time = start_time + bp_usec; - unsigned long now = busy_loop_current_time(); + if (bp) { + ktime_t end_time = ktime_add(start_time, bp); + ktime_t now = net_to_ktime(busy_loop_current_time()); - return time_after(now, end_time); + return ktime_after(now, end_time); } return true; @@ -124,7 +129,8 @@ static bool io_napi_busy_loop_should_end(void *data, return true; if (io_should_wake(iowq) || io_has_work(iowq->ctx)) return true; - if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) + if (io_napi_busy_loop_timeout(net_to_ktime(start_time), + iowq->napi_busy_poll_dt)) return true; return false; @@ -181,10 +187,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, */ void io_napi_init(struct io_ring_ctx *ctx) { + u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; + INIT_LIST_HEAD(&ctx->napi_list); spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; - ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); + ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); } /* @@ -196,7 +204,6 @@ void io_napi_init(struct io_ring_ctx *ctx) void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - LIST_HEAD(napi_list); unsigned int i; spin_lock(&ctx->napi_lock); @@ -217,11 +224,13 @@ void io_napi_free(struct io_ring_ctx *ctx) int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { - .busy_poll_to = ctx->napi_busy_poll_to, + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; struct io_uring_napi napi; + if (ctx->flags & IORING_SETUP_IOPOLL) + return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) @@ -230,7 +239,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); + WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_enabled, true); return 0; @@ -247,14 +256,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { - .busy_poll_to = ctx->napi_busy_poll_to, + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_to, 0); + WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_enabled, false); return 0; @@ -271,25 +280,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) * the NAPI timeout accordingly. */ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { - unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); - - if (ts) { - struct timespec64 poll_to_ts; - - poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); - if (timespec64_compare(ts, &poll_to_ts) < 0) { - s64 poll_to_ns = timespec64_to_ns(ts); - if (poll_to_ns > 0) { - u64 val = poll_to_ns + 999; - do_div(val, 1000); - poll_to = val; - } - } - } + ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); + + if (to_wait) + poll_dt = min(poll_dt, to_wait); - iowq->napi_busy_poll_to = poll_to; + iowq->napi_busy_poll_dt = poll_dt; } /* @@ -303,7 +301,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); - if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } @@ -315,10 +313,9 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { - LIST_HEAD(napi_list); bool is_stale = false; - if (!READ_ONCE(ctx->napi_busy_poll_to)) + if (!READ_ONCE(ctx->napi_busy_poll_dt)) return 0; if (list_empty_careful(&ctx->napi_list)) return 0; diff --git a/io_uring/napi.h b/io_uring/napi.h index 6fc0393d0dbef799fa6eded131876d6c5c29625b..27b88c3eb42894eb5171592b03dd450471c7267d 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -18,7 +18,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, - struct io_wait_queue *iowq, struct timespec64 *ts); + struct io_wait_queue *iowq, ktime_t to_wait); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -29,11 +29,11 @@ static inline bool io_napi(struct io_ring_ctx *ctx) static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { if (!io_napi(ctx)) return; - __io_napi_adjust_timeout(ctx, iowq, ts); + __io_napi_adjust_timeout(ctx, iowq, to_wait); } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, @@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_busy_poll_to)) + if (!READ_ONCE(ctx->napi_enabled)) return; sock = sock_from_file(req->file); @@ -88,7 +88,7 @@ static inline void io_napi_add(struct io_kiocb *req) } static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - struct timespec64 *ts) + ktime_t to_wait) { } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, diff --git a/io_uring/net.c b/io_uring/net.c index 594490a1389ba25e7ecc2b092298e4de70f247da..d08abcca89cc5b7d5c605f56c69e09f8d973f458 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -601,17 +601,18 @@ retry_bundle: .iovs = &kmsg->fast_iov, .max_len = INT_MAX, .nr_iovs = 1, - .mode = KBUF_MODE_EXPAND, }; if (kmsg->free_iov) { arg.nr_iovs = kmsg->free_iov_nr; arg.iovs = kmsg->free_iov; - arg.mode |= KBUF_MODE_FREE; + arg.mode = KBUF_MODE_FREE; } if (!(sr->flags & IORING_RECVSEND_BUNDLE)) arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; ret = io_buffers_select(req, &arg, issue_flags); if (unlikely(ret < 0)) @@ -623,6 +624,7 @@ retry_bundle: if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } @@ -1094,6 +1096,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } else { void __user *buf; diff --git a/io_uring/poll.c b/io_uring/poll.c index 0a8e02944689faf2aac44ac74fd82118021a2a97..1f63b60e85e7c07a20e86441b5bd0ea8d7dcf016 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -347,6 +347,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); + io_napi_add(req); return IOU_POLL_NO_ACTION; } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index b3722e5275e77e632529dedfb4285f3adfa07537..3b50dc9586d14fcb6a6b16e9fe29d029c86b15bc 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -44,7 +44,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(data_race(sqd->thread) == current); atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 1c9bf07499b197ba1fe59a5eb02df40a125395dc..9973876d91b0ef32010691e60b249988a76bdbe9 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -639,7 +639,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) - __must_hold(&req->ctx->timeout_lock) + __must_hold(&head->ctx->timeout_lock) { struct io_kiocb *req; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index a54163a8396861c1d41c1e78bda82a32d51bb840..8391c7c7c1ec48182487e54642c5dadd0f2b40a7 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -265,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); io_req_set_res(req, ret, 0); - return ret < 0 ? ret : IOU_OK; + return IOU_OK; } int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 543b8c9edac78737e149963ad6f12e7ba5f8a2a9..14853644645767f553dca524db846ade3494f8fb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17390,8 +17390,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; if (exact != NOT_EXACT && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026da7f8f27dcde18b526048ddc87a7..b1fd2a3db91a2ee71bc645882d09d09093459dd6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd134c1c5fbc676d3fc7dc2e4c4c3d3..64d44a52c011474960833f0a864b15d58550bfe8 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index a6e3792b15f8a5ce0f870901de50b53a356df829..d570535342cb78c967e3a7c6e0cc2204e109ba0c 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -416,8 +416,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) diff --git a/kernel/events/core.c b/kernel/events/core.c index aa3450bdc2276cd47d749ac9631600f7963ecfa9..c973e3c11e036f6ed948431f95ebafab5011663f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9706,7 +9706,8 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); - if (event->prog && !bpf_overflow_handler(event, data, regs)) + if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT && + !bpf_overflow_handler(event, data, regs)) return ret; /* diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f20127f44ab5db96ec7e858dcce77f54..18bdc87209d0574cfca1831a7979b240a6f57eda 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,11 +2053,24 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - bool thread = flags & PIDFD_THREAD; - - if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + if (!pid) return -EINVAL; + scoped_guard(rcu) { + struct task_struct *tsk; + + if (flags & PIDFD_THREAD) + tsk = pid_task(pid, PIDTYPE_PID); + else + tsk = pid_task(pid, PIDTYPE_TGID); + if (!tsk) + return -EINVAL; + + /* Don't create pidfds for kernel threads for now. */ + if (tsk->flags & PF_KTHREAD) + return -EINVAL; + } + return __pidfd_prepare(pid, flags, ret); } @@ -2403,6 +2416,12 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; + /* Don't create pidfds for kernel threads for now. */ + if (args->kthread) { + retval = -EINVAL; + goto bad_fork_free_pid; + } + /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 07e99c936ba5d86f8f3a291f6b7d95abcd32f03a..1dee88ba0ae449a7c83d5cfa8fe6b69f83b3b6d6 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -530,6 +530,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 4ad5ed8adf96910395d2e62e8e23c78af815b1f6..6dc76b590703edd263f8e88c9f15da09f4b66aa7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -236,7 +236,7 @@ void static_key_disable_cpuslocked(struct static_key *key) } jump_label_lock(); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); jump_label_unlock(); } @@ -289,7 +289,7 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key) return; guard(mutex)(&jump_label_mutex); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); else WARN_ON_ONCE(!static_key_slow_try_dec(key)); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fb2c77368d187eda2a3a9083f3cb4c11f2b78ebb..a9a0ca605d4a8c82c5575706443287120ee6edc4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static void cleanup_symbol_name(char *s) -{ - char *res; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes only in LLVM LTO observed: - * - foo.llvm.[0-9a-f]+ - */ - res = strstr(s, ".llvm."); - if (res) - *res = '\0'; - - return; -} - -static int compare_symbol_name(const char *name, char *namebuf) -{ - /* The kallsyms_seqs_of_names is sorted based on names after - * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. - * To ensure correct bisection in kallsyms_lookup_names(), do - * cleanup_symbol_name(namebuf) before comparing name and namebuf. - */ - cleanup_symbol_name(namebuf); - return strcmp(name, namebuf); -} - static unsigned int get_symbol_seq(int index) { unsigned int i, seq = 0; @@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(mid); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - ret = compare_symbol_name(name, namebuf); + ret = strcmp(name, namebuf); if (ret > 0) low = mid + 1; else if (ret < 0) @@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(low - 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; low--; } @@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(high + 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; high++; } @@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = strlen(namebuf); - goto found; + return strlen(namebuf); } /* See if it's in a module or a BPF JITed image. */ @@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); -found: - cleanup_symbol_name(namebuf); return ret; } @@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr, int lookup_symbol_name(unsigned long addr, char *symname) { - int res; - symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - goto found; + return 0; } /* See if it's in a module. */ - res = lookup_module_symbol_name(addr, symname); - if (res) - return res; - -found: - cleanup_symbol_name(symname); - return 0; + return lookup_module_symbol_name(addr, symname); } /* Look up a kernel symbol and return it in a text buffer. */ diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 2f84896a7bcbdf873be7fa48d05eabe58334bb1a..873f7c445488c17390cff0c21d308aeb8840c27e 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void) stat.min, stat.max, div_u64(stat.sum, stat.real_cnt)); } -static bool match_cleanup_name(const char *s, const char *name) -{ - char *p; - int len; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - p = strstr(s, ".llvm."); - if (!p) - return false; - - len = strlen(name); - if (p - s != len) - return false; - - return !strncmp(s, name, len); -} - static int find_symbol(void *data, const char *name, unsigned long addr) { struct test_stat *stat = (struct test_stat *)data; - if (strcmp(name, stat->name) == 0 || - (!stat->perf && match_cleanup_name(name, stat->name))) { + if (!strcmp(name, stat->name)) { stat->real_cnt++; stat->addr = addr; diff --git a/kernel/kcov.c b/kernel/kcov.c index f0a69d402066e12cdbf99f589ac6bbca02896978..274b6b7c718de6ba3eb90d3593ac494b30b25688 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area, kmsan_unpoison_memory(&area->list, sizeof(area->list)); } +/* + * Unlike in_serving_softirq(), this function returns false when called during + * a hardirq or an NMI that happened in the softirq context. + */ +static inline bool in_softirq_really(void) +{ + return in_serving_softirq() && !in_hardirq() && !in_nmi(); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * so we ignore code executed in interrupts, unless we are in a remote * coverage collection section in a softirq. */ - if (!in_task() && !(in_serving_softirq() && t->kcov_softirq)) + if (!in_task() && !(in_softirq_really() && t->kcov_softirq)) return false; mode = READ_ONCE(t->kcov_mode); /* @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle) if (WARN_ON(!kcov_check_handle(handle, true, true, true))) return; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); @@ -991,7 +1000,7 @@ void kcov_remote_stop(void) int sequence; unsigned long flags; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e85de37d9e1e01b2dde77c8e63cc4e18673411f6..da59c68df8412c4662d39d3e286d516e8cee9a69 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr) if (lookup_symbol_name(addr, symbuf)) return false; - return str_has_prefix("__cfi_", symbuf) || - str_has_prefix("__pfx_", symbuf); + return str_has_prefix(symbuf, "__cfi_") || + str_has_prefix(symbuf, "__pfx_"); } static int check_kprobe_address_safe(struct kprobe *p, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 07fb5987b42bdfeb52a66a87954de0c7f0bc0c40..1bab21b4718fffd9fa1c76d46827f44af870dd9d 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -92,7 +92,14 @@ static ssize_t profiling_store(struct kobject *kobj, const char *buf, size_t count) { int ret; + static DEFINE_MUTEX(lock); + /* + * We need serialization, for profile_setup() initializes prof_on + * value and profile_init() must not reallocate prof_buffer after + * once allocated. + */ + guard(mutex)(&lock); if (prof_on) return -EEXIST; /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 58c88220a478ac59a00dc83be03ed260346559e6..0349f957e672da9ede0c3e6ff24892dffa8118e2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, ip); @@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, _RET_IP_); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index f5a36e67b59355a21a3f4befaf2248236286ff92..ac2e2250274105b118911958f9bcc8e703e33589 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - enum vcpu_state old = vcpu_halted; + u8 old = vcpu_halted; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will diff --git a/kernel/module/main.c b/kernel/module/main.c index d9592195c5bbb4261ab6993a4ad56e7513f6331d..71396e297499f47f9871dbac48ccc5a79f8db65a 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3104,7 +3104,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) struct idempotent *existing; bool first; - u->ret = 0; + u->ret = -EINTR; u->cookie = cookie; init_completion(&u->complete); @@ -3140,7 +3140,7 @@ static int idempotent_complete(struct idempotent *u, int ret) hlist_for_each_entry_safe(pos, next, head, entry) { if (pos->cookie != cookie) continue; - hlist_del(&pos->entry); + hlist_del_init(&pos->entry); pos->ret = ret; complete(&pos->complete); } @@ -3148,6 +3148,28 @@ static int idempotent_complete(struct idempotent *u, int ret) return ret; } +/* + * Wait for the idempotent worker. + * + * If we get interrupted, we need to remove ourselves from the + * the idempotent list, and the completion may still come in. + * + * The 'idem_lock' protects against the race, and 'idem.ret' was + * initialized to -EINTR and is thus always the right return + * value even if the idempotent work then completes between + * the wait_for_completion and the cleanup. + */ +static int idempotent_wait_for_completion(struct idempotent *u) +{ + if (wait_for_completion_interruptible(&u->complete)) { + spin_lock(&idem_lock); + if (!hlist_unhashed(&u->entry)) + hlist_del(&u->entry); + spin_unlock(&idem_lock); + } + return u->ret; +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; @@ -3183,15 +3205,16 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int if (!f || !(f->f_mode & FMODE_READ)) return -EBADF; - /* See if somebody else is doing the operation? */ - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; + /* Are we the winners of the race and get to do this? */ + if (!idempotent(&idem, file_inode(f))) { + int ret = init_module_from_file(f, uargs, flags); + return idempotent_complete(&idem, ret); } - /* Otherwise, we'll do it and complete others */ - return idempotent_complete(&idem, - init_module_from_file(f, uargs, flags)); + /* + * Somebody else won the race and is loading the module. + */ + return idempotent_wait_for_completion(&idem); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) diff --git a/kernel/padata.c b/kernel/padata.c index 53f4bc9127127c287bad71da7d870cbb42c51d6a..0fa6c289546032727595a1ccbdc2e48d3ceb342d 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -517,6 +517,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) if (job->numa_aware) { int old_node = atomic_read(&last_used_nid); diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925e748e6589b0196388784ceba302d..2a0449144f82ea8c3daef723b56f63a22e128657 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fdfdcb1b3f11a67ccd82289eec810a..c22b07049c382c2174416adfd3aab9afab5a85a9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { diff --git a/kernel/profile.c b/kernel/profile.c index 2b775cc5c28f96cebe33e6d9c55a001d0debd6ce..1fcf1adcf4eb033ef0a5e2960c63404700b94519 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -47,30 +47,14 @@ static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); -static cpumask_var_t prof_cpu_mask; -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); -static DEFINE_PER_CPU(int, cpu_profile_flip); -static DEFINE_MUTEX(profile_flip_mutex); -#endif /* CONFIG_SMP */ - int profile_setup(char *str) { static const char schedstr[] = "schedule"; - static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; const char *select = NULL; int par; - if (!strncmp(str, sleepstr, strlen(sleepstr))) { -#ifdef CONFIG_SCHEDSTATS - force_schedstat_enabled(); - prof_on = SLEEP_PROFILING; - select = sleepstr; -#else - pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); -#endif /* CONFIG_SCHEDSTATS */ - } else if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { @@ -114,11 +98,6 @@ int __ref profile_init(void) buffer_bytes = prof_len*sizeof(atomic_t); - if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN); if (prof_buffer) return 0; @@ -132,195 +111,16 @@ int __ref profile_init(void) if (prof_buffer) return 0; - free_cpumask_var(prof_cpu_mask); return -ENOMEM; } -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -/* - * Each cpu has a pair of open-addressed hashtables for pending - * profile hits. read_profile() IPI's all cpus to request them - * to flip buffers and flushes their contents to prof_buffer itself. - * Flip requests are serialized by the profile_flip_mutex. The sole - * use of having a second hashtable is for avoiding cacheline - * contention that would otherwise happen during flushes of pending - * profile hits required for the accuracy of reported profile hits - * and so resurrect the interrupt livelock issue. - * - * The open-addressed hashtables are indexed by profile buffer slot - * and hold the number of pending hits to that profile buffer slot on - * a cpu in an entry. When the hashtable overflows, all pending hits - * are accounted to their corresponding profile buffer slots with - * atomic_add() and the hashtable emptied. As numerous pending hits - * may be accounted to a profile buffer slot in a hashtable entry, - * this amortizes a number of atomic profile buffer increments likely - * to be far larger than the number of entries in the hashtable, - * particularly given that the number of distinct profile buffer - * positions to which hits are accounted during short intervals (e.g. - * several seconds) is usually very small. Exclusion from buffer - * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from - * process context). - * The hash function is meant to be lightweight as opposed to strong, - * and was vaguely inspired by ppc64 firmware-supported inverted - * pagetable hash functions, but uses a full hashtable full of finite - * collision chains, not just pairs of them. - * - * -- nyc - */ -static void __profile_flip_buffers(void *unused) -{ - int cpu = smp_processor_id(); - - per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); -} - -static void profile_flip_buffers(void) -{ - int i, j, cpu; - - mutex_lock(&profile_flip_mutex); - j = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; - for (i = 0; i < NR_PROFILE_HIT; ++i) { - if (!hits[i].hits) { - if (hits[i].pc) - hits[i].pc = 0; - continue; - } - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].hits = hits[i].pc = 0; - } - } - mutex_unlock(&profile_flip_mutex); -} - -static void profile_discard_flip_buffers(void) -{ - int i, cpu; - - mutex_lock(&profile_flip_mutex); - i = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; - memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); - } - mutex_unlock(&profile_flip_mutex); -} - -static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) -{ - unsigned long primary, secondary, flags, pc = (unsigned long)__pc; - int i, j, cpu; - struct profile_hit *hits; - - pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); - i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - cpu = get_cpu(); - hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; - if (!hits) { - put_cpu(); - return; - } - /* - * We buffer the global profiler buffer into a per-CPU - * queue and thus reduce the number of global (and possibly - * NUMA-alien) accesses. The write-queue is self-coalescing: - */ - local_irq_save(flags); - do { - for (j = 0; j < PROFILE_GRPSZ; ++j) { - if (hits[i + j].pc == pc) { - hits[i + j].hits += nr_hits; - goto out; - } else if (!hits[i + j].hits) { - hits[i + j].pc = pc; - hits[i + j].hits = nr_hits; - goto out; - } - } - i = (i + secondary) & (NR_PROFILE_HIT - 1); - } while (i != primary); - - /* - * Add the current hit(s) and flush the write-queue out - * to the global buffer: - */ - atomic_add(nr_hits, &prof_buffer[pc]); - for (i = 0; i < NR_PROFILE_HIT; ++i) { - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].pc = hits[i].hits = 0; - } -out: - local_irq_restore(flags); - put_cpu(); -} - -static int profile_dead_cpu(unsigned int cpu) -{ - struct page *page; - int i; - - if (cpumask_available(prof_cpu_mask)) - cpumask_clear_cpu(cpu, prof_cpu_mask); - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); - per_cpu(cpu_profile_hits, cpu)[i] = NULL; - __free_page(page); - } - } - return 0; -} - -static int profile_prepare_cpu(unsigned int cpu) -{ - int i, node = cpu_to_mem(cpu); - struct page *page; - - per_cpu(cpu_profile_flip, cpu) = 0; - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) - continue; - - page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - profile_dead_cpu(cpu); - return -ENOMEM; - } - per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); - - } - return 0; -} - -static int profile_online_cpu(unsigned int cpu) -{ - if (cpumask_available(prof_cpu_mask)) - cpumask_set_cpu(cpu, prof_cpu_mask); - - return 0; -} - -#else /* !CONFIG_SMP */ -#define profile_flip_buffers() do { } while (0) -#define profile_discard_flip_buffers() do { } while (0) - static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); + if (pc < prof_len) + atomic_add(nr_hits, &prof_buffer[pc]); } -#endif /* !CONFIG_SMP */ void profile_hits(int type, void *__pc, unsigned int nr_hits) { @@ -334,8 +134,8 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && - cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) + /* This is the old kernel-only legacy profiling */ + if (!user_mode(regs)) profile_hit(type, (void *)profile_pc(regs)); } @@ -358,7 +158,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) char *pnt; unsigned long sample_step = 1UL << prof_shift; - profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) @@ -404,7 +203,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return -EINVAL; } #endif - profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } @@ -418,40 +216,14 @@ static const struct proc_ops profile_proc_ops = { int __ref create_proc_profile(void) { struct proc_dir_entry *entry; -#ifdef CONFIG_SMP - enum cpuhp_state online_state; -#endif - int err = 0; if (!prof_on) return 0; -#ifdef CONFIG_SMP - err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", - profile_prepare_cpu, profile_dead_cpu); - if (err) - return err; - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", - profile_online_cpu, NULL); - if (err < 0) - goto err_state_prep; - online_state = err; - err = 0; -#endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &profile_proc_ops); - if (!entry) - goto err_state_onl; - proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); - - return err; -err_state_onl: -#ifdef CONFIG_SMP - cpuhp_remove_state(online_state); -err_state_prep: - cpuhp_remove_state(CPUHP_PROFILE_PREPARE); -#endif + if (entry) + proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); return err; } subsys_initcall(create_proc_profile); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a9f655025607b977a790adbc7f33b1265fce6636..f3951e4a55e5b6078142f2f2b44359b7560e6e4f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7845,6 +7845,30 @@ void set_rq_offline(struct rq *rq) } } +static inline void sched_set_rq_online(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_online(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + +static inline void sched_set_rq_offline(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + /* * used to mark begin/end of suspend/resume: */ @@ -7895,10 +7919,25 @@ static int cpuset_cpu_inactive(unsigned int cpu) return 0; } +static inline void sched_smt_present_inc(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); +#endif +} + +static inline void sched_smt_present_dec(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif +} + int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; /* * Clear the balance_push callback and prepare to schedule @@ -7906,13 +7945,10 @@ int sched_cpu_activate(unsigned int cpu) */ balance_push_set(cpu, false); -#ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_inc_cpuslocked(&sched_smt_present); -#endif + sched_smt_present_inc(cpu); set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -7930,12 +7966,7 @@ int sched_cpu_activate(unsigned int cpu) * 2) At runtime, if cpuset_cpu_active() fails to rebuild the * domains. */ - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_online(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_online(rq, cpu); return 0; } @@ -7943,7 +7974,6 @@ int sched_cpu_activate(unsigned int cpu) int sched_cpu_deactivate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; int ret; /* @@ -7974,20 +8004,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu(); - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_offline(rq, cpu); -#ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_dec_cpuslocked(&sched_smt_present); + sched_smt_present_dec(cpu); +#ifdef CONFIG_SCHED_SMT sched_core_cpu_deactivate(cpu); #endif @@ -7997,6 +8021,8 @@ int sched_cpu_deactivate(unsigned int cpu) sched_update_numa(cpu, false); ret = cpuset_cpu_inactive(cpu); if (ret) { + sched_smt_present_inc(cpu); + sched_set_rq_online(rq, cpu); balance_push_set(cpu, false); set_cpu_active(cpu, true); sched_update_numa(cpu, true); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a5e00293ae433077920ea7cc0b3a73c7e59f5dc5..0bed0fa1acd988a0b29aed21fa9f77e908795029 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, } stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); + /* + * Because mul_u64_u64_div_u64() can approximate on some + * achitectures; enforce the constraint that: a*b/(b+c) <= a. + */ + if (unlikely(stime > rtime)) + stime = rtime; update: /* diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 78e48f5426ee1f8157e32381d32b79cfd5990373..eb0cdcd4d9212e3a5ce1696f4257f93c66dacfb2 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, trace_sched_stat_blocked(p, delta); - /* - * Blocking time is in units of nanosecs, so shift by - * 20 to get a milliseconds-range estimation of the - * amount of time that the task spent sleeping: - */ - if (unlikely(prof_on == SLEEP_PROFILING)) { - profile_hits(SLEEP_PROFILING, - (void *)get_wchan(p), - delta >> 20); - } account_scheduler_latency(p, delta >> 10, 0); } } diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c2daa7ad3f906aefa5a521b36ce49d30257725d..5d14d639ac71b54a24dac49acd02f6c2c7e58e04 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -6,12 +6,14 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +#ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); +#endif /** * task_work_add - ask the @task to execute @work->func() @@ -57,6 +59,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work, if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; + if (!IS_ENABLED(CONFIG_IRQ_WORK)) + return -EINVAL; } else { /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); @@ -81,9 +85,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; +#ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; +#endif default: WARN_ON_ONCE(1); break; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d25ba49e313cca182029e272b7bac004773f92a8..d0538a75f4c63ac1e9c8eb36f86eeb8f3b8e87d2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); if (wd_delay <= WATCHDOG_MAX_SKEW) { - if (nretries > 1 || nretries >= max_retries) { + if (nretries > 1 && nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 406dccb79c2b6b5dde3c619e3107afd9925ab7ff..8d2dd214ec68227814a880cd5e1f73c6700c0d4e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -727,17 +727,16 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, } if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; + time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; + time_constant = clamp(txc->constant, 0, MAXTC); if (!(time_status & STA_NANO)) time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); + time_constant = clamp(time_constant, 0, MAXTC); } if (txc->modes & ADJ_TAI && diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b4843099a8da74ad79903e8ea9d59b260006f277..ed58eebb4e8f423cf0af169b17b09d94061cbb6a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,7 +1141,6 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1167,6 +1166,8 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2fa87dcfeda9c2f16b3613544ce10f363145d181..5391e4167d60226dfc48c845170e36bcbeb7b292 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2606,7 +2606,7 @@ int do_adjtimex(struct __kernel_timex *txc) clock_set |= timekeeping_advance(TK_ADV_FREQ); if (clock_set) - clock_was_set(CLOCK_REALTIME); + clock_was_set(CLOCK_SET_WALL); ntp_notify_cmos_timer(); diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 84413114db5c5b34e7b11ceaaeb1167a2ad04635..8d57f7686bb03a1f2e81365348f450233c8cd7a1 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -475,9 +475,54 @@ static bool tmigr_check_lonely(struct tmigr_group *group) return bitmap_weight(&active, BIT_CNT) <= 1; } -typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *); +/** + * struct tmigr_walk - data required for walking the hierarchy + * @nextexp: Next CPU event expiry information which is handed into + * the timer migration code by the timer code + * (get_next_timer_interrupt()) + * @firstexp: Contains the first event expiry information when + * hierarchy is completely idle. When CPU itself was the + * last going idle, information makes sure, that CPU will + * be back in time. When using this value in the remote + * expiry case, firstexp is stored in the per CPU tmigr_cpu + * struct of CPU which expires remote timers. It is updated + * in top level group only. Be aware, there could occur a + * new top level of the hierarchy between the 'top level + * call' in tmigr_update_events() and the check for the + * parent group in walk_groups(). Then @firstexp might + * contain a value != KTIME_MAX even if it was not the + * final top level. This is not a problem, as the worst + * outcome is a CPU which might wake up a little early. + * @evt: Pointer to tmigr_event which needs to be queued (of idle + * child group) + * @childmask: groupmask of child group + * @remote: Is set, when the new timer path is executed in + * tmigr_handle_remote_cpu() + * @basej: timer base in jiffies + * @now: timer base monotonic + * @check: is set if there is the need to handle remote timers; + * required in tmigr_requires_handle_remote() only + * @tmc_active: this flag indicates, whether the CPU which triggers + * the hierarchy walk is !idle in the timer migration + * hierarchy. When the CPU is idle and the whole hierarchy is + * idle, only the first event of the top level has to be + * considered. + */ +struct tmigr_walk { + u64 nextexp; + u64 firstexp; + struct tmigr_event *evt; + u8 childmask; + bool remote; + unsigned long basej; + u64 now; + bool check; + bool tmc_active; +}; + +typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, struct tmigr_walk *); -static void __walk_groups(up_f up, void *data, +static void __walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { struct tmigr_group *child = NULL, *group = tmc->tmgroup; @@ -490,64 +535,17 @@ static void __walk_groups(up_f up, void *data, child = group; group = group->parent; + data->childmask = child->groupmask; } while (group); } -static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc) +static void walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { lockdep_assert_held(&tmc->lock); __walk_groups(up, data, tmc); } -/** - * struct tmigr_walk - data required for walking the hierarchy - * @nextexp: Next CPU event expiry information which is handed into - * the timer migration code by the timer code - * (get_next_timer_interrupt()) - * @firstexp: Contains the first event expiry information when last - * active CPU of hierarchy is on the way to idle to make - * sure CPU will be back in time. - * @evt: Pointer to tmigr_event which needs to be queued (of idle - * child group) - * @childmask: childmask of child group - * @remote: Is set, when the new timer path is executed in - * tmigr_handle_remote_cpu() - */ -struct tmigr_walk { - u64 nextexp; - u64 firstexp; - struct tmigr_event *evt; - u8 childmask; - bool remote; -}; - -/** - * struct tmigr_remote_data - data required for remote expiry hierarchy walk - * @basej: timer base in jiffies - * @now: timer base monotonic - * @firstexp: returns expiry of the first timer in the idle timer - * migration hierarchy to make sure the timer is handled in - * time; it is stored in the per CPU tmigr_cpu struct of - * CPU which expires remote timers - * @childmask: childmask of child group - * @check: is set if there is the need to handle remote timers; - * required in tmigr_requires_handle_remote() only - * @tmc_active: this flag indicates, whether the CPU which triggers - * the hierarchy walk is !idle in the timer migration - * hierarchy. When the CPU is idle and the whole hierarchy is - * idle, only the first event of the top level has to be - * considered. - */ -struct tmigr_remote_data { - unsigned long basej; - u64 now; - u64 firstexp; - u8 childmask; - bool check; - bool tmc_active; -}; - /* * Returns the next event of the timerqueue @group->events * @@ -618,10 +616,9 @@ static u64 tmigr_next_groupevt_expires(struct tmigr_group *group) static bool tmigr_active_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -649,8 +646,7 @@ static bool tmigr_active_up(struct tmigr_group *group, } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); - if ((walk_done == false) && group->parent) - data->childmask = group->childmask; + trace_tmigr_group_set_cpu_active(group, newstate, childmask); /* * The group is active (again). The group event might be still queued @@ -666,8 +662,6 @@ static bool tmigr_active_up(struct tmigr_group *group, */ group->groupevt.ignore = true; - trace_tmigr_group_set_cpu_active(group, newstate, childmask); - return walk_done; } @@ -675,7 +669,7 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc) { struct tmigr_walk data; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; trace_tmigr_cpu_active(tmc); @@ -860,10 +854,8 @@ unlock: static bool tmigr_new_timer_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_walk *data = ptr; - return tmigr_update_events(group, child, data); } @@ -995,9 +987,8 @@ unlock: static bool tmigr_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; struct tmigr_event *evt; unsigned long jif; u8 childmask; @@ -1034,12 +1025,10 @@ again: } /* - * Update of childmask for the next level and keep track of the expiry - * of the first event that needs to be handled (group->next_expiry was - * updated by tmigr_next_expired_groupevt(), next was set by - * tmigr_handle_remote_cpu()). + * Keep track of the expiry of the first event that needs to be handled + * (group->next_expiry was updated by tmigr_next_expired_groupevt(), + * next was set by tmigr_handle_remote_cpu()). */ - data->childmask = group->childmask; data->firstexp = group->next_expiry; raw_spin_unlock_irq(&group->lock); @@ -1055,12 +1044,12 @@ again: void tmigr_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; if (tmigr_is_not_available(tmc)) return; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; /* @@ -1068,7 +1057,7 @@ void tmigr_handle_remote(void) * in tmigr_handle_remote_up() anyway. Keep this check to speed up the * return when nothing has to be done. */ - if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask)) { + if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { /* * If this CPU was an idle migrator, make sure to clear its wakeup * value so it won't chase timers that have already expired elsewhere. @@ -1097,9 +1086,8 @@ void tmigr_handle_remote(void) static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; u8 childmask; childmask = data->childmask; @@ -1118,7 +1106,7 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, * group before reading the next_expiry value. */ if (group->parent && !data->tmc_active) - goto out; + return false; /* * The lock is required on 32bit architectures to read the variable @@ -1143,9 +1131,6 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, raw_spin_unlock(&group->lock); } -out: - /* Update of childmask for the next level */ - data->childmask = group->childmask; return false; } @@ -1157,7 +1142,7 @@ out: bool tmigr_requires_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; unsigned long jif; bool ret = false; @@ -1165,7 +1150,7 @@ bool tmigr_requires_handle_remote(void) return ret; data.now = get_jiffies_update(&jif); - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; data.tmc_active = !tmc->idle; data.check = false; @@ -1230,14 +1215,13 @@ u64 tmigr_cpu_new_timer(u64 nextexp) if (nextexp != tmc->cpuevt.nextevt.expires || tmc->cpuevt.ignore) { ret = tmigr_new_timer(tmc, nextexp); + /* + * Make sure the reevaluation of timers in idle path + * will not miss an event. + */ + WRITE_ONCE(tmc->wakeup, ret); } } - /* - * Make sure the reevaluation of timers in idle path will not miss an - * event. - */ - WRITE_ONCE(tmc->wakeup, ret); - trace_tmigr_cpu_new_timer_idle(tmc, nextexp); raw_spin_unlock(&tmc->lock); return ret; @@ -1245,10 +1229,9 @@ u64 tmigr_cpu_new_timer(u64 nextexp) static bool tmigr_inactive_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate, childstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -1299,9 +1282,10 @@ static bool tmigr_inactive_up(struct tmigr_group *group, WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active)); - if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, - newstate.state)) + if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { + trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); break; + } /* * The memory barrier is paired with the cmpxchg() in @@ -1317,22 +1301,6 @@ static bool tmigr_inactive_up(struct tmigr_group *group, /* Event Handling */ tmigr_update_events(group, child, data); - if (group->parent && (walk_done == false)) - data->childmask = group->childmask; - - /* - * data->firstexp was set by tmigr_update_events() and contains the - * expiry of the first global event which needs to be handled. It - * differs from KTIME_MAX if: - * - group is the top level group and - * - group is idle (which means CPU was the last active CPU in the - * hierarchy) and - * - there is a pending event in the hierarchy - */ - WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent); - - trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); - return walk_done; } @@ -1341,7 +1309,7 @@ static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp) struct tmigr_walk data = { .nextexp = nextexp, .firstexp = KTIME_MAX, .evt = &tmc->cpuevt, - .childmask = tmc->childmask }; + .childmask = tmc->groupmask }; /* * If nextexp is KTIME_MAX, the CPU event will be ignored because the @@ -1400,7 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp) * the only one in the level 0 group; and if it is the * only one in level 0 group, but there are more than a * single group active on the way to top level) - * * nextevt - when CPU is offline and has to handle timer on his own + * * nextevt - when CPU is offline and has to handle timer on its own * or when on the way to top in every group only a single * child is active but @nextevt is before the lowest * next_expiry encountered while walking up to top level. @@ -1419,7 +1387,7 @@ u64 tmigr_quick_check(u64 nextevt) if (WARN_ON_ONCE(tmc->idle)) return nextevt; - if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask)) + if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) return KTIME_MAX; do { @@ -1442,6 +1410,66 @@ u64 tmigr_quick_check(u64 nextevt) return KTIME_MAX; } +/* + * tmigr_trigger_active() - trigger a CPU to become active again + * + * This function is executed on a CPU which is part of cpu_online_mask, when the + * last active CPU in the hierarchy is offlining. With this, it is ensured that + * the other CPU is active and takes over the migrator duty. + */ +static long tmigr_trigger_active(void *unused) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + WARN_ON_ONCE(!tmc->online || tmc->idle); + + return 0; +} + +static int tmigr_cpu_offline(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + int migrator; + u64 firstexp; + + raw_spin_lock_irq(&tmc->lock); + tmc->online = false; + WRITE_ONCE(tmc->wakeup, KTIME_MAX); + + /* + * CPU has to handle the local events on his own, when on the way to + * offline; Therefore nextevt value is set to KTIME_MAX + */ + firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); + trace_tmigr_cpu_offline(tmc); + raw_spin_unlock_irq(&tmc->lock); + + if (firstexp != KTIME_MAX) { + migrator = cpumask_any_but(cpu_online_mask, cpu); + work_on_cpu(migrator, tmigr_trigger_active, NULL); + } + + return 0; +} + +static int tmigr_cpu_online(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + /* Check whether CPU data was successfully initialized */ + if (WARN_ON_ONCE(!tmc->tmgroup)) + return -EINVAL; + + raw_spin_lock_irq(&tmc->lock); + trace_tmigr_cpu_online(tmc); + tmc->idle = timer_base_is_idle(); + if (!tmc->idle) + __tmigr_cpu_activate(tmc); + tmc->online = true; + raw_spin_unlock_irq(&tmc->lock); + return 0; +} + static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, int node) { @@ -1514,21 +1542,25 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, } static void tmigr_connect_child_parent(struct tmigr_group *child, - struct tmigr_group *parent) + struct tmigr_group *parent, + bool activate) { - union tmigr_state childstate; + struct tmigr_walk data; raw_spin_lock_irq(&child->lock); raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); child->parent = parent; - child->childmask = BIT(parent->num_children++); + child->groupmask = BIT(parent->num_children++); raw_spin_unlock(&parent->lock); raw_spin_unlock_irq(&child->lock); trace_tmigr_connect_child_parent(child); + if (!activate) + return; + /* * To prevent inconsistent states, active children need to be active in * the new parent as well. Inactive children are already marked inactive @@ -1544,21 +1576,24 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, * child to the new parent. So tmigr_connect_child_parent() is * executed with the formerly top level group (child) and the newly * created group (parent). + * + * * It is ensured that the child is active, as this setup path is + * executed in hotplug prepare callback. This is exectued by an + * already connected and !idle CPU. Even if all other CPUs go idle, + * the CPU executing the setup will be responsible up to current top + * level group. And the next time it goes inactive, it will release + * the new childmask and parent to subsequent walkers through this + * @child. Therefore propagate active state unconditionally. */ - childstate.state = atomic_read(&child->migr_state); - if (childstate.migrator != TMIGR_NONE) { - struct tmigr_walk data; - - data.childmask = child->childmask; + data.childmask = child->groupmask; - /* - * There is only one new level per time. When connecting the - * child and the parent and set the child active when the parent - * is inactive, the parent needs to be the uppermost - * level. Otherwise there went something wrong! - */ - WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); - } + /* + * There is only one new level per time (which is protected by + * tmigr_mutex). When connecting the child and the parent and set the + * child active when the parent is inactive, the parent needs to be the + * uppermost level. Otherwise there went something wrong! + */ + WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); } static int tmigr_setup_groups(unsigned int cpu, unsigned int node) @@ -1611,12 +1646,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) * Update tmc -> group / child -> group connection */ if (i == 0) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); raw_spin_lock_irq(&group->lock); tmc->tmgroup = group; - tmc->childmask = BIT(group->num_children++); + tmc->groupmask = BIT(group->num_children++); raw_spin_unlock_irq(&group->lock); @@ -1626,7 +1661,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) continue; } else { child = stack[i - 1]; - tmigr_connect_child_parent(child, group); + /* Will be activated at online time */ + tmigr_connect_child_parent(child, group, false); } /* check if uppermost level was newly created */ @@ -1637,12 +1673,21 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) lvllist = &tmigr_level_list[top]; if (group->num_children == 1 && list_is_singular(lvllist)) { + /* + * The target CPU must never do the prepare work, except + * on early boot when the boot CPU is the target. Otherwise + * it may spuriously activate the old top level group inside + * the new one (nevertheless whether old top level group is + * active or not) and/or release an uninitialized childmask. + */ + WARN_ON_ONCE(cpu == raw_smp_processor_id()); + lvllist = &tmigr_level_list[top - 1]; list_for_each_entry(child, lvllist, list) { if (child->parent) continue; - tmigr_connect_child_parent(child, group); + tmigr_connect_child_parent(child, group, true); } } } @@ -1664,80 +1709,31 @@ static int tmigr_add_cpu(unsigned int cpu) return ret; } -static int tmigr_cpu_online(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int ret; - - /* First online attempt? Initialize CPU data */ - if (!tmc->tmgroup) { - raw_spin_lock_init(&tmc->lock); - - ret = tmigr_add_cpu(cpu); - if (ret < 0) - return ret; - - if (tmc->childmask == 0) - return -EINVAL; - - timerqueue_init(&tmc->cpuevt.nextevt); - tmc->cpuevt.nextevt.expires = KTIME_MAX; - tmc->cpuevt.ignore = true; - tmc->cpuevt.cpu = cpu; - - tmc->remote = false; - WRITE_ONCE(tmc->wakeup, KTIME_MAX); - } - raw_spin_lock_irq(&tmc->lock); - trace_tmigr_cpu_online(tmc); - tmc->idle = timer_base_is_idle(); - if (!tmc->idle) - __tmigr_cpu_activate(tmc); - tmc->online = true; - raw_spin_unlock_irq(&tmc->lock); - return 0; -} - -/* - * tmigr_trigger_active() - trigger a CPU to become active again - * - * This function is executed on a CPU which is part of cpu_online_mask, when the - * last active CPU in the hierarchy is offlining. With this, it is ensured that - * the other CPU is active and takes over the migrator duty. - */ -static long tmigr_trigger_active(void *unused) +static int tmigr_cpu_prepare(unsigned int cpu) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); + int ret = 0; - WARN_ON_ONCE(!tmc->online || tmc->idle); - - return 0; -} - -static int tmigr_cpu_offline(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int migrator; - u64 firstexp; + /* Not first online attempt? */ + if (tmc->tmgroup) + return ret; - raw_spin_lock_irq(&tmc->lock); - tmc->online = false; + raw_spin_lock_init(&tmc->lock); + timerqueue_init(&tmc->cpuevt.nextevt); + tmc->cpuevt.nextevt.expires = KTIME_MAX; + tmc->cpuevt.ignore = true; + tmc->cpuevt.cpu = cpu; + tmc->remote = false; WRITE_ONCE(tmc->wakeup, KTIME_MAX); - /* - * CPU has to handle the local events on his own, when on the way to - * offline; Therefore nextevt value is set to KTIME_MAX - */ - firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); - trace_tmigr_cpu_offline(tmc); - raw_spin_unlock_irq(&tmc->lock); + ret = tmigr_add_cpu(cpu); + if (ret < 0) + return ret; - if (firstexp != KTIME_MAX) { - migrator = cpumask_any_but(cpu_online_mask, cpu); - work_on_cpu(migrator, tmigr_trigger_active, NULL); - } + if (tmc->groupmask == 0) + return -EINVAL; - return 0; + return ret; } static int __init tmigr_init(void) @@ -1796,6 +1792,11 @@ static int __init tmigr_init(void) tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP, tmigr_crossnode_level); + ret = cpuhp_setup_state(CPUHP_TMIGR_PREPARE, "tmigr:prepare", + tmigr_cpu_prepare, NULL); + if (ret) + goto err; + ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online", tmigr_cpu_online, tmigr_cpu_offline); if (ret) @@ -1807,4 +1808,4 @@ err: pr_err("Timer migration setup failed\n"); return ret; } -late_initcall(tmigr_init); +early_initcall(tmigr_init); diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h index 6c37d94a37d90a07b6ff49afff00fa033eec97df..154accc7a543c4d1a5f90980496c3471e48ff34e 100644 --- a/kernel/time/timer_migration.h +++ b/kernel/time/timer_migration.h @@ -22,7 +22,17 @@ struct tmigr_event { * struct tmigr_group - timer migration hierarchy group * @lock: Lock protecting the event information and group hierarchy * information during setup - * @parent: Pointer to the parent group + * @parent: Pointer to the parent group. Pointer is updated when a + * new hierarchy level is added because of a CPU coming + * online the first time. Once it is set, the pointer will + * not be removed or updated. When accessing parent pointer + * lock less to decide whether to abort a propagation or + * not, it is not a problem. The worst outcome is an + * unnecessary/early CPU wake up. But do not access parent + * pointer several times in the same 'action' (like + * activation, deactivation, check for remote expiry,...) + * without holding the lock as it is not ensured that value + * will not change. * @groupevt: Next event of the group which is only used when the * group is !active. The group event is then queued into * the parent timer queue. @@ -41,9 +51,8 @@ struct tmigr_event { * @num_children: Counter of group children to make sure the group is only * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup * only - * @childmask: childmask of the group in the parent group; is set - * during setup and will never change; can be read - * lockless + * @groupmask: mask of the group in the parent group; is set during + * setup and will never change; can be read lockless * @list: List head that is added to the per level * tmigr_level_list; is required during setup when a * new group needs to be connected to the existing @@ -59,7 +68,7 @@ struct tmigr_group { unsigned int level; int numa_node; unsigned int num_children; - u8 childmask; + u8 groupmask; struct list_head list; }; @@ -79,7 +88,7 @@ struct tmigr_group { * hierarchy * @remote: Is set when timers of the CPU are expired remotely * @tmgroup: Pointer to the parent group - * @childmask: childmask of tmigr_cpu in the parent group + * @groupmask: mask of tmigr_cpu in the parent group * @wakeup: Stores the first timer when the timer migration * hierarchy is completely idle and remote expiry was done; * is returned to timer code in the idle path and is only @@ -92,7 +101,7 @@ struct tmigr_cpu { bool idle; bool remote; struct tmigr_group *tmgroup; - u8 childmask; + u8 groupmask; u64 wakeup; struct tmigr_event cpuevt; }; @@ -108,8 +117,8 @@ union tmigr_state { u32 state; /** * struct - split state of tmigr_group - * @active: Contains each childmask bit of the active children - * @migrator: Contains childmask of the child which is migrator + * @active: Contains each mask bit of the active children + * @migrator: Contains mask of the child which is migrator * @seq: Sequence counter needs to be increased when an update * to the tmigr_state is done. It prevents a race when * updates in the child groups are propagated in changed diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index fc205ad167a9b72b0f6440f9b0369732dc147fe5..d1d5ea2d0a1b706d1be876ae7172a4ada898b927 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -902,7 +902,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, i = *idx ? : task->curr_ret_stack; while (i > 0) { - ret_stack = get_ret_stack(current, i, &i); + ret_stack = get_ret_stack(task, i, &i); if (!ret_stack) break; /* diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index cb0871fbdb07f034465c51c59ef9ca06b84768de..314ffc143039c53da1c0612448901a8104cdd3d5 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - static void busy_wait(ulong time) { u64 start, end; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28853966aa9afff9fc6e9a6af90a3eec58902c60..cebd879a30cbd10b75945239d4eff05bd84538d5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -692,18 +692,6 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, return ts; } -/** - * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer - * @buffer: The ring_buffer to get the number of pages from - * @cpu: The cpu of the ring_buffer to get the number of pages from - * - * Returns the number of pages used by a per_cpu buffer of the ring buffer. - */ -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) -{ - return buffer->buffers[cpu]->nr_pages; -} - /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10cd38bce2f1c47ca894782aa1265f8d715445f8..ebe7ce2f5f4a50f9402da8978e89b7db66592079 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8783bebd0562e51aa572759746111efcbff9157f..bd3e3069300e267c2a0cb5a5e0e2e56ce1f2de83 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1634,6 +1634,29 @@ static inline void *event_file_data(struct file *filp) extern struct mutex event_mutex; extern struct list_head ftrace_events; +/* + * When the trace_event_file is the filp->i_private pointer, + * it must be taken under the event_mutex lock, and then checked + * if the EVENT_FILE_FL_FREED flag is set. If it is, then the + * data pointed to by the trace_event_file can not be trusted. + * + * Use the event_file_file() to access the trace_event_file from + * the filp the first time under the event_mutex and check for + * NULL. If it is needed to be retrieved again and the event_mutex + * is still held, then the event_file_data() can be used and it + * is guaranteed to be valid. + */ +static inline struct trace_event_file *event_file_file(struct file *filp) +{ + struct trace_event_file *file; + + lockdep_assert_held(&event_mutex); + file = READ_ONCE(file_inode(filp)->i_private); + if (!file || file->flags & EVENT_FILE_FL_FREED) + return NULL; + return file; +} + extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6ef29eba90ceb7e06774171d79c7ea244ecaa72a..7266ec2a4eea0016de17676edb17b4d08af4fe41 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,18 +992,18 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) void event_file_get(struct trace_event_file *file) { - atomic_inc(&file->ref); + refcount_inc(&file->ref); } void event_file_put(struct trace_event_file *file) { - if (WARN_ON_ONCE(!atomic_read(&file->ref))) { + if (WARN_ON_ONCE(!refcount_read(&file->ref))) { if (file->flags & EVENT_FILE_FL_FREED) kmem_cache_free(file_cachep, file); return; } - if (atomic_dec_and_test(&file->ref)) { + if (refcount_dec_and_test(&file->ref)) { /* Count should only go to zero when it is freed */ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) return; @@ -1386,12 +1386,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, char buf[4] = "0"; mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); - if (!file || flags & EVENT_FILE_FL_FREED) + if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && @@ -1424,8 +1424,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 1: ret = -ENODEV; mutex_lock(&event_mutex); - file = event_file_data(filp); - if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) { + file = event_file_file(filp); + if (likely(file)) { ret = tracing_update_buffers(file->tr); if (ret < 0) { mutex_unlock(&event_mutex); @@ -1540,7 +1540,8 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -1572,7 +1573,8 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; @@ -1627,12 +1629,14 @@ static int f_show(struct seq_file *m, void *v) static void *f_start(struct seq_file *m, loff_t *pos) { + struct trace_event_file *file; void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - if (!event_file_data(m->private)) + file = event_file_file(m->private); + if (!file) return ERR_PTR(-ENODEV); while (l < *pos && p) @@ -1706,8 +1710,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file && !(file->flags & EVENT_FILE_FL_FREED)) + file = event_file_file(filp); + if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); @@ -1736,9 +1740,13 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return PTR_ERR(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file) - err = apply_event_filter(file, buf); + file = event_file_file(filp); + if (file) { + if (file->flags & EVENT_FILE_FL_FREED) + err = -ENODEV; + else + err = apply_event_filter(file, buf); + } mutex_unlock(&event_mutex); kfree(buf); @@ -2485,7 +2493,6 @@ static int event_callback(const char *name, umode_t *mode, void **data, if (strcmp(name, "format") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_event_format_fops; - *data = call; return 1; } @@ -2996,7 +3003,7 @@ trace_create_new_event(struct trace_event_call *call, atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); - event_file_get(file); + refcount_set(&file->ref, 1); return file; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6ece1308d36a02dec5af3ca3cebdb9d6b427aac7..5f9119eb7c67f6e1e11fcf2c8a37c311e1a0372d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5601,7 +5601,7 @@ static int hist_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; @@ -5880,7 +5880,7 @@ static int hist_debug_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 8650562bdaa9886e59a519aec516fc4b9209f0b5..a8f076809db4d58978847c7ad3f161dac220cf09 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -299,7 +299,7 @@ event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, strim(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (file) { call = file->event_call; size = parse_entry(buf, call, &entry); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4bec043c8690d382f8c493721e0f6e7a1ce1a91a..a5e3d6acf1e1e241ffeef791b9281c47840dfaea 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -159,7 +159,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos) /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) return ERR_PTR(-ENODEV); @@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) mutex_lock(&event_mutex); - if (unlikely(!event_file_data(file))) { + if (unlikely(!event_file_file(file))) { mutex_unlock(&event_mutex); return -ENODEV; } @@ -293,7 +293,7 @@ static ssize_t event_trigger_regex_write(struct file *file, strim(buf); mutex_lock(&event_mutex); - event_file = event_file_data(file); + event_file = event_file_file(file); if (unlikely(!event_file)) { mutex_unlock(&event_mutex); kfree(buf); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index a4dcf0f2435213bc2b2b91d677ec18290aa53859..3a56e7c8aa4f67281e57bb6538dc961c4b65c670 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map) struct tracing_map_elt *elt = NULL; int idx; - idx = atomic_inc_return(&map->next_elt); + idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); if (idx < map->max_elts) { elt = *(TRACING_MAP_ELT(map->elts, idx)); if (map->ops && map->ops->elt_init) @@ -699,7 +699,7 @@ void tracing_map_clear(struct tracing_map *map) { unsigned int i; - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); atomic64_set(&map->hits, 0); atomic64_set(&map->drops, 0); @@ -783,7 +783,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits, map->map_bits = map_bits; map->max_elts = (1 << map_bits); - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); map->map_size = (1 << (map_bits + 1)); map->ops = ops; diff --git a/lib/btree.c b/lib/btree.c index 49420cae3a8330ac650ff3114ee98cb2ed8e088a..bb81d3393ac5c52cd2a80e2712380e6dfe8b8e47 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -43,7 +43,6 @@ #include #include -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NODESIZE MAX(L1_CACHE_BYTES, 128) struct btree_geo { diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c index a105e6369efc912cc910aeb932ae0335688ac485..6b62a6bdd50e3ec002ca5ad4bafaeb34401d22c0 100644 --- a/lib/cpumask_kunit.c +++ b/lib/cpumask_kunit.c @@ -152,4 +152,5 @@ static struct kunit_suite test_cpumask_suite = { }; kunit_test_suite(test_cpumask_suite); +MODULE_DESCRIPTION("KUnit tests for cpumask"); MODULE_LICENSE("GPL"); diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 3518e7394eca8e48313f2f8aa21fcc51dad898df..ca736166f10009e9d0fa9f3582acaae3f5d29ec8 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -232,7 +232,8 @@ static int INIT get_next_block(struct bunzip_data *bd) RUNB) */ symCount = symTotal+2; for (j = 0; j < groupCount; j++) { - unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + unsigned char length[MAX_SYMBOLS]; + unsigned short temp[MAX_HUFCODE_BITS+1]; int minLen, maxLen, pp; /* Read Huffman code lengths for each symbol. They're stored in a way similar to mtf; record a starting diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 20a858031f12b7600f906a90584d73aa21e2311e..9d34d35908daa89c74bf4e53864b49e4434e18c5 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -37,7 +37,9 @@ #include +#ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif static long long INIT read_int(unsigned char *ptr, int size) { diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index d3fb09e6eff15c28ae8029501061d1da28cabf66..402e160e718632665368dc3badcf1c97f6effb0e 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -194,4 +194,5 @@ static int __init find_bit_test(void) } module_init(find_bit_test); +MODULE_DESCRIPTION("Test for find_*_bit functions"); MODULE_LICENSE("GPL"); diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8e4e1277411a27c554604686d3ddc..fa692c86f06969239bd501bd30a4723d20e2b291 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index f314a0c15a6d83d8fff4a71b71347fba6da112f2..2abc78367dd110cf8aaaa86190ee9a6248786441 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); static void overflow_allocation_test(struct kunit *test) { - const char device_name[] = "overflow-test"; struct device *dev; int count = 0; @@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test) } while (0) /* Create dummy device for devm_kmalloc()-family tests. */ - dev = kunit_device_register(test, device_name); + dev = kunit_device_register(test, "overflow-test"); KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), "Cannot register test device\n"); diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6dfb8d46a4ffe9bf88b1192b69047dbfabc3988c..65a75d58ed9e468f856801f02488caaa91240dae 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -1486,4 +1486,5 @@ static void __init selftest(void) KSTM_MODULE_LOADERS(test_bitmap); MODULE_AUTHOR("david decotigny "); +MODULE_DESCRIPTION("Test cases for bitmap API"); MODULE_LICENSE("GPL"); diff --git a/lib/usercopy.c b/lib/usercopy.c index 499a7a7d54db21088e6ced4d124bbd9024dc03ae..7b17b83c8042426f11f0ffc6a37a998bcdcd71ed 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -12,40 +12,18 @@ /* out-of-line parts */ -#ifndef INLINE_COPY_FROM_USER +#if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST) unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long res = n; - might_fault(); - if (!should_fail_usercopy() && likely(access_ok(from, n))) { - /* - * Ensure that bad access_ok() speculation will not - * lead to nasty side effects *after* the copy is - * finished: - */ - barrier_nospec(); - instrument_copy_from_user_before(to, from, n); - res = raw_copy_from_user(to, from, n); - instrument_copy_from_user_after(to, from, n, res); - } - if (unlikely(res)) - memset(to + (n - res), 0, res); - return res; + return _inline_copy_from_user(to, from, n); } EXPORT_SYMBOL(_copy_from_user); #endif -#ifndef INLINE_COPY_TO_USER +#if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST) unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { - might_fault(); - if (should_fail_usercopy()) - return n; - if (likely(access_ok(to, n))) { - instrument_copy_to_user(to, from, n); - n = raw_copy_to_user(to, from, n); - } - return n; + return _inline_copy_to_user(to, from, n); } EXPORT_SYMBOL(_copy_to_user); #endif diff --git a/lib/vsprintf.c b/lib/vsprintf.c index cdd4e2314bfc6ed6800ae4ed7ebcb4e5432b20bb..2d71b11159161b122bceb4131c46ca9ac37f282f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1080,7 +1080,7 @@ char *resource_string(char *buf, char *end, struct resource *res, #define FLAG_BUF_SIZE (2 * sizeof(res->flags)) #define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]") #define RAW_BUF_SIZE sizeof("[mem - flags 0x]") - char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + char sym[MAX(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; char *p = sym, *pend = sym + sizeof(sym); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f9696c94e2110b0ab957efae031f5bec1541ad18..67c86a5d64a6a9825092c54ba801c91fa30121db 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -89,9 +89,17 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, bool smaps = tva_flags & TVA_SMAPS; bool in_pf = tva_flags & TVA_IN_PF; bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS; + unsigned long supported_orders; + /* Check the intersection of requested and supported orders. */ - orders &= vma_is_anonymous(vma) ? - THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE; + if (vma_is_anonymous(vma)) + supported_orders = THP_ORDERS_ALL_ANON; + else if (vma_is_dax(vma)) + supported_orders = THP_ORDERS_ALL_FILE_DAX; + else + supported_orders = THP_ORDERS_ALL_FILE_DEFAULT; + + orders &= supported_orders; if (!orders) return 0; @@ -877,7 +885,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret, off_sub; - if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) return 0; if (off_end <= off_align || (off_end - off_align) < size) @@ -1677,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1720,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1745,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914c9aacf3e5aab38e144a1eb7d3f40..0c3f56b3578eba44f25870fd8bd6f25ee8bdf9cb 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/list_lru.c b/mm/list_lru.c index a29d96929d7c7e17138995319ca511e231d1b582..9b7ff06e9d326bb0da9f486da6c05a56c77d786c 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -85,6 +85,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) } #endif /* CONFIG_MEMCG */ +/* The caller must ensure the memcg lifetime. */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -109,14 +110,22 @@ EXPORT_SYMBOL_GPL(list_lru_add); bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_add(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_add(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_add_obj); +/* The caller must ensure the memcg lifetime. */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -139,11 +148,18 @@ EXPORT_SYMBOL_GPL(list_lru_del); bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_del(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_del(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_del_obj); diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e53b24748b431c16b4b72af22e923..417c96f2da28e8c3984792903d5daed1786a9e2b 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 960371788687720de58cdf146b900d6efb66c04c..f29157288b7dd68a51ebf19f2d361aa66132d675 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3386,11 +3386,28 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1) static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&memcg_idr_lock); + ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1, + GFP_NOWAIT); + spin_unlock(&memcg_idr_lock); + idr_preload_end(); + return ret; +} static void mem_cgroup_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { + spin_lock(&memcg_idr_lock); idr_remove(&mem_cgroup_idr, memcg->id.id); + spin_unlock(&memcg_idr_lock); + memcg->id.id = 0; } } @@ -3524,8 +3541,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) if (!memcg) return ERR_PTR(error); - memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, - 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL); + memcg->id.id = mem_cgroup_alloc_id(); if (memcg->id.id < 0) { error = memcg->id.id; goto fail; @@ -3667,7 +3683,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) * publish it here at the end of onlining. This matches the * regular ID destruction during offlining. */ + spin_lock(&memcg_idr_lock); idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + spin_unlock(&memcg_idr_lock); return 0; offline_kmem: diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c911758d09719f457ffa2fd65d953d222..7066fc84f35172a55030addda85eddbb3c9828d8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } diff --git a/mm/memory.c b/mm/memory.c index 1ff7b6f51ec1a4a3f35ddb9f93b85720f10102bf..3c01d68065be21a45d8b9085d7f81785bc5cbfd8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4780,7 +4780,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; - bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE); + bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); pte_t entry; flush_icache_pages(vma, page, nr); @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d57a2ee8585fca9e0a031be090f97f..923ea80ba7442b82c58f4a1680a77562ee154f98 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b934148c97c259f4180e7a4fb2f0c..51960079875baad0fee3005269fe59e2a5f74d68 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); @@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); @@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0b92ba80fac5b3459d9bfe0ceb4b4..15bba28acc005fdd8725f4dda8937bbf364433c2 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8337926b89d421997072dcbe07cd5018b75a24ef..c565de8f48e9d8cd10bfb53988373350f9c98ec3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -2343,16 +2343,20 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - int count = READ_ONCE(pcp->count); - - while (count) { - int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); - count -= to_drain; + int count; + do { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); + count = pcp->count; + if (count) { + int to_drain = min(count, + pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } spin_unlock(&pcp->lock); - } + } while (count); } /* @@ -3068,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3364,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3389,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3446,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -5751,7 +5750,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) @@ -5815,6 +5813,16 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char return pages; } +void free_reserved_page(struct page *page) +{ + clear_page_tag_ref(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + adjust_managed_page_count(page, 1); +} +EXPORT_SYMBOL(free_reserved_page); + static int page_alloc_cpu_dead(unsigned int cpu) { struct zone *zone; @@ -6930,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6958,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7017,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401ba1165de0d89351b3c1c3ac65a1b..641d93f6af4c1e2563aae6885a1e098bf1060bc8 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/shmem.c b/mm/shmem.c index 2faa9daaf54baaf853460fcd424d6bfe5fb1a056..5a77acf6ac6a621dc7b5e7b46402b2b714b45bea 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1629,11 +1629,6 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma->vm_flags; - /* - * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that - * are enabled for this vma. - */ - unsigned long orders = BIT(PMD_ORDER + 1) - 1; loff_t i_size; int order; @@ -1678,7 +1673,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, if (global_huge) mask |= READ_ONCE(huge_shmem_orders_inherit); - return orders & mask; + return THP_ORDERS_ALL_FILE_DEFAULT & mask; } static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, @@ -1686,6 +1681,7 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault unsigned long orders) { struct vm_area_struct *vma = vmf->vma; + pgoff_t aligned_index; unsigned long pages; int order; @@ -1697,9 +1693,9 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault order = highest_order(orders); while (orders) { pages = 1UL << order; - index = round_down(index, pages); - if (!xa_find(&mapping->i_pages, &index, - index + pages - 1, XA_PRESENT)) + aligned_index = round_down(index, pages); + if (!xa_find(&mapping->i_pages, &aligned_index, + aligned_index + pages - 1, XA_PRESENT)) break; order = next_order(&orders, order); } diff --git a/mm/slub.c b/mm/slub.c index 3520acaf9afaf356e7bc149a9ca6ea25340d3dec..c9d8a2497fd65e1f48cd933a67bf75ab8f9c117b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4690,6 +4690,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.slab) continue; + if (kfence_free(df.freelist)) + continue; + do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b0a6aca1620ee96f9ae448349ea85..edcc7a6b0f6f2072e730e45a43a96eefded18302 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d137387c1a18536e494a8f2db8f8c58..0f018c6f9ec52f324237bd9b671f39f3a22e3287 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a14323361c53196f8e2a2645dc1142..af2de36549d60807a8bea9ed9e08cc8c389656f5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc6365a971c56882476f3b44d3cf6bd15..e875f2a4915f5f79df8a121270a30896ada043ff 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d6581ab7c073d5c1cb933bcd8ef7991d4a77d5f..2d3163e4da96a565f7c2f8bcc6ac122ed64aa2f0 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -120,8 +120,6 @@ #define CLASS_BITS 8 #define MAGIC_VAL_BITS 8 -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a4ebd93adfcd7572f8e481d99be990efed3f153..06da8ac13dca8ef6737a6500f4ef59c1c4e34666 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -119,13 +119,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: - /* If discovery was not started then it was initiated by the - * MGMT interface so no MGMT event shall be generated either - */ - if (old_state != DISCOVERY_STARTING) { - hdev->discovery.state = old_state; - return; - } mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dce8035ca7990cfbddbe6a8edf7163fcdad7ee7b..d0c118c47f6c9355d3916c4912f54c930fbf2a94 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1721,9 +1721,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) switch (enable) { case LE_SCAN_ENABLE: hci_dev_set_flag(hdev, HCI_LE_SCAN); - if (hdev->le_scan_type == LE_SCAN_ACTIVE) + if (hdev->le_scan_type == LE_SCAN_ACTIVE) { clear_pending_adv_report(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + } break; case LE_SCAN_DISABLE: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index cd2ed16da8a466c3f03f99254790a76e041a2c42..e79cd40bd079df3848ebc04efa777b8b4d48373a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2976,6 +2976,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) */ filter_policy = hci_update_accept_list_sync(hdev); + /* If suspended and filter_policy set to 0x00 (no acceptlist) then + * passive scanning cannot be started since that would require the host + * to be woken up to process the reports. + */ + if (hdev->suspended && !filter_policy) { + /* Check if accept list is empty then there is no need to scan + * while suspended. + */ + if (list_empty(&hdev->le_accept_list)) + return 0; + + /* If there are devices is the accept_list that means some + * devices could not be programmed which in non-suspended case + * means filter_policy needs to be set to 0x00 so the host needs + * to filter, but since this is treating suspended case we + * can ignore device needing host to filter to allow devices in + * the acceptlist to be able to wakeup the system. + */ + filter_policy = 0x01; + } + /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support @@ -2998,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; + + /* Disable duplicates filter when scanning for advertisement + * monitor for the following reasons. + * + * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm + * controllers ignore RSSI_Sampling_Period when the duplicates + * filter is enabled. + * + * For SW pattern filtering, when we're not doing interleaved + * scanning, it is necessary to disable duplicates filter, + * otherwise hosts can only receive one advertisement and it's + * impossible to know if a peer is still in range. + */ + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c3c26bbb5ddaebd2c4faa0da887774a32688b71c..9988ba382b686ad8a59c72dcf65749138af0afb5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6774,6 +6774,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { + l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9a1cb5079a7a0758b241f8425ea4153045e72d07..b2ae0d2434d2ebb143ff9005aef033b40bde5e2f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); - hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); - br_multicast_gc(&deleted_head); + flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 09f6a773a7080a48ab074bdff185cf30149ff2d9..8f9c19d992ac5ce333f8d59ff7628a2335f8cdfc 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv, if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; + if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + WARN_ON_ONCE(skb_shared(skb)); - WARN_ON_ONCE(refcount_read(&nfct->use) != 1); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. diff --git a/net/core/dev.c b/net/core/dev.c index 6ea1d20676fbe9da53c1b7491fc87202bb100649..f66e614078832842e903fc2ee792cd89b8a3572f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5150,6 +5150,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) bpf_net_ctx_clear(bpf_net_ctx); return XDP_DROP; } + bpf_net_ctx_clear(bpf_net_ctx); } return XDP_PASS; out_redir: @@ -9911,6 +9912,15 @@ static void netdev_sync_lower_features(struct net_device *upper, } } +static bool netdev_has_ip_or_hw_csum(netdev_features_t features) +{ + netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + bool ip_csum = (features & ip_csum_mask) == ip_csum_mask; + bool hw_csum = features & NETIF_F_HW_CSUM; + + return ip_csum || hw_csum; +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -9992,15 +10002,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_LRO; } - if (features & NETIF_F_HW_TLS_TX) { - bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - bool hw_csum = features & NETIF_F_HW_CSUM; - - if (!ip_csum && !hw_csum) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; - } + if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; } if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { @@ -10008,6 +10012,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_HW_TLS_RX; } + if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n"); + features &= ~NETIF_F_GSO_UDP_L4; + } + return features; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8ec35194bfcb8574f53a9fd28f0cb2ebfe9a3f2e..ab150641142aa1545c71fc5d3b11db33c70cf437 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_wq, &linkwatch_work, 0); + mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else - schedule_delayed_work(&linkwatch_work, delay); + queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 87e67194f24046a8420bbb51c19fb0a686b9b06b..73fd7f543fd0954ba3f4a2291675047831970024 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3288,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(tgt_net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index ae4b4b28a6014f5b18cce788a14b0e8d78224084..655ff5224ffa30ff721998f38e2b22b1545c68fa 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -35,7 +35,10 @@ struct cmis_cdb_fw_mng_features_rpl { __be16 resv7; }; -#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 +enum cmis_cdb_fw_write_mechanism { + CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01, + CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, +}; static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, @@ -64,7 +67,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, } rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; - if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL || + rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) { ethnl_module_fw_flash_ntf_err(dev, ntf_params, "Write LPL is not supported", NULL); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 983fee76f5cf15e6b9142e32c632b175d6a8df5e..e18823bf23306dffd32ddc752458e3c350a29b42 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1331,13 +1331,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 user_indir_len = 0, indir_bytes = 0; struct ethtool_rxfh_param rxfh_dev = {}; struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; bool locked = false; /* dev->ethtool->rss_lock taken */ - u32 indir_bytes = 0; bool create = false; u8 *rss_config; int ret; @@ -1369,23 +1369,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key, function - * or input transformation. - */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.key_size && rxfh.key_size != dev_key_size)) + return -EINVAL; + + /* Must request at least one change: indir size, hash key, function + * or input transformation. + * There's no need for any of it in case of context creation. + */ + if (!create && (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER); if (!rss_config) return -ENOMEM; @@ -1400,6 +1402,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, */ if (rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + user_indir_len = indir_bytes; rxfh_dev.indir = (u32 *)rss_config; rxfh_dev.indir_size = dev_indir_size; ret = ethtool_copy_validate_indir(rxfh_dev.indir, @@ -1426,7 +1429,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.key_size = dev_key_size; rxfh_dev.key = rss_config + indir_bytes; if (copy_from_user(rxfh_dev.key, - useraddr + rss_cfg_offset + indir_bytes, + useraddr + rss_cfg_offset + user_indir_len, rxfh.key_size)) { ret = -EFAULT; goto out; @@ -1449,12 +1452,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; u32 ctx_id; /* driver uses new API, core allocates ID */ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + XA_LIMIT(1, limit - 1), + GFP_KERNEL_ACCOUNT); if (ret < 0) { kfree(ctx); goto out; @@ -1474,16 +1478,21 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.input_xfrm = rxfh.input_xfrm; if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) + if (create) { ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); - else if (rxfh_dev.rss_delete) + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && + !memchr_inv(ethtool_rxfh_context_key(ctx), + 0, ctx->key_size)); + } else if (rxfh_dev.rss_delete) { ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, extack); - else + } else { ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); + } } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } @@ -1522,6 +1531,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } + + /* Fetch the defaults for the old API, in the new API drivers + * should write defaults into ctx themselves. + */ + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + + rxfh_dev.key = rss_config + indir_bytes; + rxfh_dev.key_size = dev_key_size; + + ret = ops->get_rxfh(dev, &rxfh_dev); + if (WARN_ON(ret)) { + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + goto out; + } } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); @@ -1530,12 +1555,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh_dev.indir) { for (i = 0; i < dev_indir_size; i++) ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; - ctx->indir_configured = 1; + ctx->indir_configured = + rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, dev_key_size); - ctx->key_configured = 1; + ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) ctx->hfunc = rxfh_dev.hfunc; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 71679137eff21820a88c25dd75a7bf115e899154..5c4c4505ab9a4a4497e6cfd669e3506c95511c81 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base, const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; - len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ + nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); + struct rss_req_info *request = RSS_REQINFO(req_base); + + if (request->rss_context && + nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) + return -EMSGSIZE; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 4d42d0756fd70c980dac9620b1d778c6721ee28d..a5db7c67d61befe2a064c82051ff8ac7d65ac590 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv4_table, - iptable_nat_table_init); + int ret; + /* net->gen->ptr[iptable_nat_net_id] must be allocated + * before calling iptable_nat_table_init(). + */ + ret = register_pernet_subsys(&iptable_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&iptable_nat_net_ops); - if (ret < 0) { - xt_unregister_template(&nf_nat_ipv4_table); - return ret; - } + ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + if (ret < 0) + unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { - unregister_pernet_subsys(&iptable_nat_net_ops); xt_unregister_template(&nf_nat_ipv4_table); + unregister_pernet_subsys(&iptable_nat_net_ops); } module_init(iptable_nat_init); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6c4664c681ca530d67b9ccc54f11cbb178d32c8e..40053a02bae103a9e1617ab0457da6398d75cd85 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -44,7 +44,7 @@ #include #include -#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) +#define TCPUDP_MIB_MAX MAX_T(u32, UDP_MIB_MAX, TCP_MIB_MAX) /* * Report socket allocation statistics [mea@utu.fi] diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 85531437890cee662a2b63c2fbfdae73d6285642..db6516092daf5b180fb75482fb711f226451a647 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +static void tcp_ao_info_free_rcu(struct rcu_head *head) { - struct tcp_ao_info *ao; + struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu); struct tcp_ao_key *key; struct hlist_node *n; + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); + } + kfree(ao); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao) +{ + size_t total_ao_sk_mem = 0; + struct tcp_ao_key *key; + + hlist_for_each_entry(key, &ao->head, node) + total_ao_sk_mem += tcp_ao_sizeof_key(key); + atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + if (twsk) { ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); - tcp_twsk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL); } else { ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL); } if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; - hlist_for_each_entry_safe(key, n, &ao->head, node) { - hlist_del_rcu(&key->node); - if (!twsk) - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); - call_rcu(&key->rcu, tcp_ao_key_free_rcu); - } - - kfree_rcu(ao, rcu); - static_branch_slow_dec_deferred(&tcp_ao_needed); + if (!twsk) + tcp_ao_sk_omem_free(sk, ao); + call_rcu(&ao->rcu, tcp_ao_info_free_rcu); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 454362e359da56c83a00a2523ca09c61c563608e..e37488d3453f03b68928718a69559581b436812b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4b791e74529e157ead943e4faa5b3b90869c7273..e4ad3311e14895ab898a653c3a29cd8af3e42dff 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (thlen < sizeof(*th)) goto out; + if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) + goto out; + if (!pskb_may_pull(skb, thlen)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index aa2e0a28ca61388637616b2860da12e6d144e6dd..b254a5dadfcf3f42e3ebb3aa35688a5619d9b4e0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -278,6 +278,16 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (unlikely(skb_checksum_start(gso_skb) != + skb_transport_header(gso_skb))) + return ERR_PTR(-EINVAL); + + /* We don't know if egress device can segment and checksum the packet + * when IPv6 extension headers are present. Fall back to software GSO. + */ + if (gso_skb->ip_summed != CHECKSUM_PARTIAL) + features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 70a0b2ad6bd70c2d5152ab59d2834a63587e6652..b8eec1b6cc2c88bfbf66fec165a6a8782ffd8242 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { + bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; @@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, break; #endif default: - if (ndisc_is_useropt(dev, nd_opt)) { - ndopts->nd_useropts_end = nd_opt; - if (!ndopts->nd_useropts) - ndopts->nd_useropts = nd_opt; - } else { - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the - * protocol. - */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); - } + unknown = true; + } + if (ndisc_is_useropt(dev, nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else if (unknown) { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } next_opt: opt_len -= l; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 52cf104e347881f052f1d515e78f05815bf7f45b..e119d4f090cc88d91a81a73be40c76d71e948018 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv6_table, - ip6table_nat_table_init); + int ret; + /* net->gen->ptr[ip6table_nat_net_id] must be allocated + * before calling ip6t_nat_register_lookups(). + */ + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&ip6table_nat_net_ops); + ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); if (ret) - xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { - unregister_pernet_subsys(&ip6table_nat_net_ops); xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f0844c9315d1819c0b1c48dd767148fca244b81..4120e67a8ce6bb44e3396ec6f66e564aae97aeec 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; + q = inet_frag_find(nf_frag->fqdir, &key); if (!q) return NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 6d1d9221649d52720d2e6a0a6d310b3c82102f10..752327b10dde74e4ee6b17e8d1a85923931e761f 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -27,7 +27,7 @@ #include #define MAX4(a, b, c, d) \ - max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) + MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d)) #define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ IPSTATS_MIB_MAX, ICMP_MIB_MAX) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c3b0b610b0aa35e9afbd74148efc6e8bb63b2822..c00323fa9eb66ed71eddf7792773b8a028b33b74 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084fb77b37f321d3bade521be66848b..2e86f520f799410aa946e0389a84fb58714941dd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -86,6 +86,11 @@ /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -1124,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock_nested(sk); + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); + if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1176,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); return ret; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 85cb71de370f297fa948c590729e0c7764fb25b3..b02b84ce2130761354829ee17b5ea9d85f8d1f81 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata) + if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -3053,6 +3053,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return -EOPNOTSUPP; + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) @@ -3115,7 +3118,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 72a9ba8bc5fd9764068e00d42755f3ba922418de..edba4a31844fb6251f35e249ea59ea74e951de2f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1768,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } sdata = rcu_dereference(local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; @@ -3957,7 +3957,8 @@ begin: break; } tx.sdata = rcu_dereference(local->monitor_sdata); - if (tx.sdata) { + if (tx.sdata && + ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &tx.sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ced19ce7c51a33198dbec12362318040dc82a597..c7ad9bc5973a0e9076e3d8fc7b594d63d842feed 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -776,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, sdata = rcu_dereference_check(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx) || lockdep_is_held(&local->hw.wiphy->mtx)); - if (sdata && + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 3ae46b545d2c2b7b7bf55ea9b101bcce798489f9..2d3efb405437d85c0bca70d7a92ca3a7363365e1 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk) nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index c30405e7683370d3e3f422ac9a7b0c625348b1ae..7884217f33eb2f75ea76086c7f7fbea7106b0c1a 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2704afd0dfe45249b1789d1c31732a982eafee65..66aa67f49d032c75e2821ed69be24b41e30a956a 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -14,7 +14,9 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8e8dcfbc299382454cf715a917c0f876ad5c35d4..ac2f1a54cc43ae4de33812357f82aef6b9b4aae0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; @@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 55406720c607119b0502fe6918bbbcf19a0a355e..23bb89c94e90d661343a1e00f62e83c60578cac5 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -426,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, &skc_local); } +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) +{ + struct mptcp_addr_info skc_local; + + mptcp_local_address((struct sock_common *)skc, &skc_local); + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_is_backup(msk, &skc_local); + + return mptcp_pm_nl_is_backup(msk, &skc_local); +} + int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ea9e5817b9e9d3dea5901798f5fee98690a28e14..4cae2aa7be5cbcee6b24c3488792558d928c8180 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -348,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { - if (mptcp_pm_is_kernel(msk)) + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, @@ -471,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con slow = lock_sock_fast(ssk); if (prio) { subflow->send_mp_prio = 1; - subflow->backup = backup; subflow->request_bkup = backup; } @@ -513,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *local; unsigned int add_addr_signal_max; unsigned int local_addr_max; struct pm_nl_pernet *pernet; @@ -556,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -568,16 +565,26 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - if (local) { - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); - mptcp_pm_nl_addr_send_ack(msk); - } - } + local = select_signal_address(pernet, msk); + if (!local) + goto subflow; + + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + return; + + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); + + if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = local; } +subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { @@ -585,9 +592,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) bool fullmesh; int i, nr; - local = select_local_address(pernet, msk); - if (!local) - break; + if (signal_and_subflow) { + local = signal_and_subflow; + signal_and_subflow = NULL; + } else { + local = select_local_address(pernet, msk); + if (!local) + break; + } fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); @@ -1102,6 +1114,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc return ret; } +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + rcu_read_unlock(); + + return backup; +} + #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1311,8 +1341,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + if (addr.addr.port && !address_use_port(&addr)) { + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } @@ -1401,6 +1431,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= ret; mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } @@ -1534,16 +1565,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { - if ((remove_anno_list_by_saddr(msk, &entry->addr) || - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && - alist.nr < MPTCP_RM_IDS_MAX) - alist.ids[alist.nr++] = entry->addr.id; + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } @@ -1556,17 +1596,18 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && - slist.nr < MPTCP_RM_IDS_MAX) + if (slist.nr < MPTCP_RM_IDS_MAX && + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = entry->addr.id; - if (remove_anno_list_by_saddr(msk, &entry->addr) && - alist.nr < MPTCP_RM_IDS_MAX) + if (alist.nr < MPTCP_RM_IDS_MAX && + remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f0a4590506c69a13123ecc041ce1724bc36d95e2..8eaa9fbe3e343be193c2b7fd0b73cba3a4227525 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -165,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, false)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + spin_unlock_bh(&msk->pm.lock); + + return backup; +} + int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a26c2c840fd9ca6e1bd90aa57e0a11c122534eca..0d536b183a6c559fc932dd5ececb1482e195ee68 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -350,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -844,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + if (__mptcp_rmem(sk) > sk_rbuf) return; - } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); @@ -1422,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } mptcp_for_each_subflow(msk, subflow) { + bool backup = subflow->backup || subflow->request_bkup; + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); - nr_active += !subflow->backup; + nr_active += !backup; pace = subflow->avg_pacing_rate; if (unlikely(!pace)) { /* init pacing rate from socket */ @@ -1439,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { - send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + if (linger_time < send_info[backup].linger_time) { + send_info[backup].ssk = ssk; + send_info[backup].linger_time = linger_time; } } __mptcp_set_timeout(sk, tout); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b11a4e50d52b6f1950660654eee022d387b6dc4b..60c6b073d65fe5f21bc3b8a16d1df7ab7f41d2cb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -448,6 +448,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; @@ -1108,6 +1109,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 39e2cbdf38019cc99e31e54ac221dbc7ef825110..a21c712350c36eaad55b7d9148f5dbaad53d19a6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return NULL; } subflow_req->local_id = local_id; + subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req); return msk; } @@ -168,6 +169,9 @@ static int subflow_check_req(struct request_sock *req, return 0; } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); + + if (mp_opt.backup) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); } if (opt_mp_capable && listener->request_mptcp) { @@ -577,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); + if (subflow->backup) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); + if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), @@ -614,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk) return err; subflow_set_local_id(subflow, err); + subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk); + return 0; } @@ -1221,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - u32 incr; + struct tcp_sock *tp = tcp_sk(ssk); + u32 offset, incr, avail_len; - incr = limit >= skb->len ? skb->len + fin : limit; + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + if (WARN_ON_ONCE(offset > skb->len)) + goto out; - pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, - subflow->map_subflow_seq); + avail_len = skb->len - offset; + incr = limit >= avail_len ? avail_len + fin : limit; + + pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; + +out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) @@ -2005,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; + new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index ff1a4e36c2b5dcf9d12d0f65ff98277f56a17beb..e06bc36f49fe74e89c4074eb8b32409bc7035e8c 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; + struct netlink_ext_ack extack = {}; struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 481ee78e77bcf96d2271b465b259554f83df33b0..0a2f79346958965833c7636f436f297c5afc7a53 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8020,6 +8020,19 @@ cont: return skb->len; } +static int nf_tables_dumpreset_obj(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_obj(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) +{ + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_obj_start(cb); +} + static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); @@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; - bool reset = false; u32 objtype; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_obj_start, - .dump = nf_tables_dump_obj, - .done = nf_tables_dump_obj_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) - return -EINVAL; + return ERR_PTR(-EINVAL); table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); - return PTR_ERR(obj); + return ERR_CAST(obj); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; + err = nf_tables_fill_obj_info(skb2, net, portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } - if (reset) { - const struct nftables_pernet *nft_net; - char *buf; + return skb2; +} - nft_net = nft_pernet(net); - buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); +static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + u32 portid = NETLINK_CB(skb).portid; + struct sk_buff *skb2; - audit_log_nfcfg(buf, - family, - 1, - AUDIT_NFT_OP_OBJ_RESET, - GFP_ATOMIC); - kfree(buf); + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_obj_start, + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, - info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj, reset); - if (err < 0) - goto err_fill_obj_info; + skb2 = nf_tables_getobj_single(portid, info, nla, false); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, info->net, portid); +} -err_fill_obj_info: - kfree_skb(skb2); - return err; +static int nf_tables_getobj_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dumpreset_obj_start, + .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getobj_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) @@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abf660c7baff01a361edaa8392edf2fac48f340..932b3ddb34f13c44333b32abd534b12f1d12955d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -427,8 +427,10 @@ replay_abort: nfnl_unlock(subsys_id); - if (nlh->nlmsg_flags & NLM_F_ACK) + if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); + } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -577,6 +579,7 @@ done: ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 55e28e1da66ec6e6e12a6ffcd5d058fc6d590520..e0716da256bf55ecb8fb275f8a36071c35f1e93f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; - const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + struct nf_conn *ct = (void *)skb_nfct(entry->skb); + unsigned long status; + unsigned int use; - if (ct && ((ct->status & flags) == IPS_DYING)) + if (!ct) + return false; + + status = READ_ONCE(ct->status); + if ((status & flags) == IPS_DYING) return true; + + if (status & IPS_CONFIRMED) + return false; + + /* in some cases skb_clone() can occur after initial conntrack + * pickup, but conntrack assumes exclusive skb->_nfct ownership for + * unconfirmed entries. + * + * This happens for br_netfilter and with ip multicast routing. + * We can't be solved with serialization here because one clone could + * have been queued for local delivery. + */ + use = refcount_read(&ct->ct_general.use); + if (likely(use == 1)) + return false; + + /* Can't decrement further? Exclusive ownership. */ + if (!refcount_dec_not_one(&ct->ct_general.use)) + return false; + + skb_set_nfct(entry->skb, 0); + /* No nf_ct_put(): we already decremented .use and it cannot + * drop down to 0. + */ + return true; #endif return false; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 113b907da0f757af7be920cc9b3a1b1c769f5804..3ba8e7e739b58a96e66ca64d38bff758500df3e1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -44,6 +44,8 @@ static DEFINE_MUTEX(zones_mutex); struct zones_ht_key { struct net *net; u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; }; struct tcf_ct_flow_table { @@ -60,7 +62,7 @@ struct tcf_ct_flow_table { static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), .key_offset = offsetof(struct tcf_ct_flow_table, key), - .key_len = sizeof_field(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9452bbfe7c3bb4b2d300e6ca6ce40..a8a254a5008e529a3baf2418f784290188933aee 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73a875573e7ad5b7a95f7941e33f0d784a91d16d..8e3093938cd2268ab20979786d28fa79fe64853f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3319,10 +3319,8 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); - if (rc) { - sk_common_release(sk); + if (rc) return rc; - } /* smc_clcsock_release() does not wait smc->clcsock->sk's * destruction; its sk_state might not be TCP_CLOSE after @@ -3368,6 +3366,9 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; else rc = smc_create_clcsk(net, sk, family); + + if (rc) + sk_common_release(sk); out: return rc; } diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 9d32058db2b5d62ca122d1ead0299427575dda35..e19177ce409230212f0e71046038f2e1138862c7 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -19,7 +19,7 @@ #include "smc_clc.h" -#define SMC_MAX_FBACK_RSN_CNT 30 +#define SMC_MAX_FBACK_RSN_CNT 36 enum { SMC_BUF_8K, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e03f14024e471d3104b08ca99375ab6d871577a5..88a59cfa5583c064d02df1b3632cb6021a24390d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -161,7 +161,7 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) str[len] = '\n'; str[len + 1] = '\0'; - return sysfs_emit(buf, str); + return sysfs_emit(buf, "%s", str); } module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4b040285aa78c061d636e1a0a75a59fbe931333b..0ff9b2dd86bac261fe3d787e0f94eaa3a0c456c9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1270,25 +1270,28 @@ out: return err; } +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + return vsk->transport->dgram_dequeue(vsk, msg, len, flags); +} + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; const struct proto *prot; -#endif - struct vsock_sock *vsk; - struct sock *sk; - sk = sock->sk; - vsk = vsock_sk(sk); - -#ifdef CONFIG_BPF_SYSCALL prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif - return vsk->transport->dgram_dequeue(vsk, msg, len, flags); + return __vsock_dgram_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); @@ -2174,15 +2177,12 @@ out: } int -vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; -#ifdef CONFIG_BPF_SYSCALL - const struct proto *prot; -#endif int err; sk = sock->sk; @@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto out; } -#ifdef CONFIG_BPF_SYSCALL - prot = READ_ONCE(sk->sk_prot); - if (prot != &vsock_proto) { - release_sock(sk); - return prot->recvmsg(sk, msg, len, flags, NULL); - } -#endif - if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else @@ -2250,6 +2242,22 @@ out: release_sock(sk); return err; } + +int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ +#ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; + const struct proto *prot; + + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); +#endif + + return __vsock_connectible_recvmsg(sock, msg, len, flags); +} EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index a3c97546ab84a620263ed17fafaf94910d416f07..c42c5cc18f324108e044772e957c8d42c92ead8c 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int int err; if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) - err = vsock_connectible_recvmsg(sock, msg, len, flags); + err = __vsock_connectible_recvmsg(sock, msg, len, flags); else if (sk->sk_type == SOCK_DGRAM) - err = vsock_dgram_recvmsg(sock, msg, len, flags); + err = __vsock_dgram_recvmsg(sock, msg, len, flags); else err = -EPROTOTYPE; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d99319d82205496636ca11a72837afbca5074fa0..64eeed82d43d5c90423a398464122eb41784e168 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3178,8 +3178,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - size_t min_hdr_len = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); + size_t min_hdr_len; struct ieee80211_ext *ext = NULL; enum cfg80211_bss_frame_type ftype; u16 beacon_interval; @@ -3202,10 +3201,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon); if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon.variable); + } else { + /* same for beacons */ + min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); } if (WARN_ON(len < min_hdr_len)) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index e419aa8c4a5ac64ea28476e99c08c0a982e9914c..d9d7bf8bb5c1acde38f1263da3a371d4788c8641 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1045,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev, cfg80211_hold_bss( bss_from_pub(params->links[link].bss)); ev->cr.links[link].bss = params->links[link].bss; + ev->cr.links[link].status = params->links[link].status; if (params->links[link].addr) { ev->cr.links[link].addr = next; diff --git a/rust/Makefile b/rust/Makefile index 83f675adbfabd3a38272d7f7ac49daed9ef178c1..8de3ebba95512c320448299b6f7a38f60dd60355 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -44,17 +44,10 @@ rustc_sysroot := $(shell MAKEFLAGS= $(RUSTC) $(rust_flags) --print sysroot) rustc_host_target := $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library -ifeq ($(quiet),silent_) -cargo_quiet=-q +ifneq ($(quiet),) rust_test_quiet=-q rustdoc_test_quiet=--test-args -q rustdoc_test_kernel_quiet=>/dev/null -else ifeq ($(quiet),quiet_) -rust_test_quiet=-q -rustdoc_test_quiet=--test-args -q -rustdoc_test_kernel_quiet=>/dev/null -else -cargo_quiet=--verbose endif core-cfgs = \ @@ -135,22 +128,21 @@ quiet_cmd_rustc_test_library = RUSTC TL $< @$(objtree)/include/generated/rustc_cfg $(rustc_target_flags) \ --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ --out-dir $(objtree)/$(obj)/test --cfg testlib \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< -rusttestlib-build_error: $(src)/build_error.rs rusttest-prepare FORCE +rusttestlib-build_error: $(src)/build_error.rs FORCE +$(call if_changed,rustc_test_library) rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes -rusttestlib-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttestlib-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-bindings: $(src)/bindings/lib.rs rusttest-prepare FORCE +rusttestlib-bindings: $(src)/bindings/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-uapi: $(src)/uapi/lib.rs rusttest-prepare FORCE +rusttestlib-uapi: $(src)/uapi/lib.rs FORCE +$(call if_changed,rustc_test_library) quiet_cmd_rustdoc_test = RUSTDOC T $< @@ -159,7 +151,7 @@ quiet_cmd_rustdoc_test = RUSTDOC T $< $(RUSTDOC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) $(rustdoc_test_target_flags) \ - --sysroot $(objtree)/$(obj)/test/sysroot $(rustdoc_test_quiet) \ + $(rustdoc_test_quiet) \ -L$(objtree)/$(obj)/test --output $(rustdoc_output) \ --crate-name $(subst rusttest-,,$@) $< @@ -192,7 +184,6 @@ quiet_cmd_rustc_test = RUSTC T $< $(RUSTC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) --out-dir $(objtree)/$(obj)/test \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$@) $<; \ $(objtree)/$(obj)/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ @@ -200,60 +191,15 @@ quiet_cmd_rustc_test = RUSTC T $< rusttest: rusttest-macros rusttest-kernel -# This prepares a custom sysroot with our custom `alloc` instead of -# the standard one. -# -# This requires several hacks: -# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, -# including third-party crates that need to be downloaded, plus custom -# `build.rs` steps. Thus hardcoding things here is not maintainable. -# - `cargo` knows how to build the standard library, but it is an unstable -# feature so far (`-Zbuild-std`). -# - `cargo` only considers the use case of building the standard library -# to use it in a given package. Thus we need to create a dummy package -# and pick the generated libraries from there. -# - The usual ways of modifying the dependency graph in `cargo` do not seem -# to apply for the `-Zbuild-std` steps, thus we have to mislead it -# by modifying the sources in the sysroot. -# - To avoid messing with the user's Rust installation, we create a clone -# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` -# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. -# -# In the future, we hope to avoid the whole ordeal by either: -# - Making the `test` crate not depend on `std` (either improving upstream -# or having our own custom crate). -# - Making the tests run in kernel space (requires the previous point). -# - Making `std` and friends be more like a "normal" crate, so that -# `-Zbuild-std` and related hacks are not needed. -quiet_cmd_rustsysroot = RUSTSYSROOT - cmd_rustsysroot = \ - rm -rf $(objtree)/$(obj)/test; \ - mkdir -p $(objtree)/$(obj)/test; \ - cp -a $(rustc_sysroot) $(objtree)/$(obj)/test/sysroot; \ - echo '\#!/bin/sh' > $(objtree)/$(obj)/test/rustc_sysroot; \ - echo "$(RUSTC) --sysroot=$(abspath $(objtree)/$(obj)/test/sysroot) \"\$$@\"" \ - >> $(objtree)/$(obj)/test/rustc_sysroot; \ - chmod u+x $(objtree)/$(obj)/test/rustc_sysroot; \ - $(CARGO) -q new $(objtree)/$(obj)/test/dummy; \ - RUSTC=$(objtree)/$(obj)/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ - test -Zbuild-std --target $(rustc_host_target) \ - --manifest-path $(objtree)/$(obj)/test/dummy/Cargo.toml; \ - rm $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ - cp $(objtree)/$(obj)/test/dummy/target/$(rustc_host_target)/debug/deps/* \ - $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib - -rusttest-prepare: FORCE - +$(call if_changed,rustsysroot) - rusttest-macros: private rustc_target_flags = --extern proc_macro rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro -rusttest-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttest-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test) +$(call if_changed,rustdoc_test) rusttest-kernel: private rustc_target_flags = --extern alloc \ --extern build_error --extern macros --extern bindings --extern uapi -rusttest-kernel: $(src)/kernel/lib.rs rusttest-prepare \ +rusttest-kernel: $(src)/kernel/lib.rs \ rusttestlib-build_error rusttestlib-macros rusttestlib-bindings \ rusttestlib-uapi FORCE +$(call if_changed,rustc_test) @@ -281,7 +227,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ - -fstrict-flex-arrays=% \ + -fstrict-flex-arrays=% -fmin-function-alignment=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -404,12 +350,12 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ - $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ + $(rustc_sysroot) $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ - __addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \ - __adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \ + __addsf3 __eqsf2 __extendsfdf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __truncdfsf2 __unordsf2 \ + __adddf3 __eqdf2 __ledf2 __ltdf2 __muldf3 __unorddf2 \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 @@ -421,7 +367,7 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) endif $(obj)/core.o: private skip_clippy = 1 -$(obj)/core.o: private skip_flags = -Dunreachable_pub +$(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) $(obj)/core.o: private rustc_target_flags = $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE @@ -435,7 +381,7 @@ $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_dep,rustc_library) $(obj)/alloc.o: private skip_clippy = 1 -$(obj)/alloc.o: private skip_flags = -Dunreachable_pub +$(obj)/alloc.o: private skip_flags = -Wunreachable_pub $(obj)/alloc.o: private rustc_target_flags = $(alloc-cfgs) $(obj)/alloc.o: $(RUST_LIB_SRC)/alloc/src/lib.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_dep,rustc_library) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 53c996e4bedfe11af0ae0a09b19bbaa6d9bc5a8c..b940a5777330454815092567b2973715b1157ab2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -30,4 +30,5 @@ const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL; const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT; const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT; const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO; +const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM; const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 40ddaee50d8bd841ea476919c754a0b1b7bcf080..93a1a3fc97bc9bc6c11c004e763cd60eac4b0c99 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -24,6 +24,7 @@ unsafe_op_in_unsafe_fn )] +#[allow(dead_code)] mod bindings_raw { // Use glob import here to expose all helpers. // Symbols defined within the module will take precedence to the glob import. diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index bba2922c6ef77fe4326d84bbe20afc80dd4b92d5..f14b8d7caf89964198313deace1bcb06fea82964 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -40,16 +40,19 @@ macro_rules! define_panicking_intrinsics( define_panicking_intrinsics!("`f32` should not be used", { __addsf3, __eqsf2, + __extendsfdf2, __gesf2, __lesf2, __ltsf2, __mulsf3, __nesf2, + __truncdfsf2, __unordsf2, }); define_panicking_intrinsics!("`f64` should not be used", { __adddf3, + __eqdf2, __ledf2, __ltdf2, __muldf3, diff --git a/rust/helpers.c b/rust/helpers.c index 87ed0a5b60990e55f19c4c23a86ccdf9ffe61efd..92d3c03ae1bd533870148e897fdea7956d74d27f 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -40,6 +42,20 @@ __noreturn void rust_helper_BUG(void) } EXPORT_SYMBOL_GPL(rust_helper_BUG); +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, + unsigned long n) +{ + return copy_from_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_user); + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, + unsigned long n) +{ + return copy_to_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_user); + void rust_helper_mutex_lock(struct mutex *lock) { mutex_lock(lock); @@ -81,6 +97,24 @@ int rust_helper_signal_pending(struct task_struct *t) } EXPORT_SYMBOL_GPL(rust_helper_signal_pending); +struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages(gfp_mask, order); +} +EXPORT_SYMBOL_GPL(rust_helper_alloc_pages); + +void *rust_helper_kmap_local_page(struct page *page) +{ + return kmap_local_page(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kmap_local_page); + +void rust_helper_kunmap_local(const void *addr) +{ + kunmap_local(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); + refcount_t rust_helper_REFCOUNT_INIT(int n) { return (refcount_t)REFCOUNT_INIT(n); diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs index 531b5e471cb1404e16c5139fb4b2854619f982fb..1966bd40701741a872aeaac08245547d3d6cbf98 100644 --- a/rust/kernel/alloc.rs +++ b/rust/kernel/alloc.rs @@ -20,6 +20,13 @@ pub struct AllocError; #[derive(Clone, Copy)] pub struct Flags(u32); +impl Flags { + /// Get the raw representation of this flag. + pub(crate) fn as_raw(self) -> u32 { + self.0 + } +} + impl core::ops::BitOr for Flags { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -52,6 +59,14 @@ pub mod flags { /// This is normally or'd with other flags. pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO); + /// Allow the allocation to be in high memory. + /// + /// Allocations in high memory may not be mapped into the kernel's address space, so this can't + /// be used with `kmalloc` and other similar methods. + /// + /// This is normally or'd with other flags. + pub const __GFP_HIGHMEM: Flags = Flags(bindings::__GFP_HIGHMEM); + /// Users can not sleep and need the allocation to succeed. /// /// A lower watermark is applied to allow access to "atomic reserves". The current @@ -66,7 +81,7 @@ pub mod flags { /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg. pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT); - /// Ror kernel allocations that should not stall for direct reclaim, start physical IO or + /// For kernel allocations that should not stall for direct reclaim, start physical IO or /// use any filesystem callback. It is very likely to fail to allocate memory, even for very /// small allocations. pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT); diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2ba03af9f0369d74e51b97c38100e8e1d7e0fd66..dee5b4b18aec403f38746a0fbd595929b7fc3ad4 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -2,7 +2,7 @@ //! Firmware abstraction //! -//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; use core::ptr::NonNull; diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 68605b633e7384f4aabc68483627cfec5ccf8770..495c09ebe3a38f9eb5b2ccb684f916c713f1823e 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -843,11 +843,8 @@ where let val = unsafe { &mut *slot }; // SAFETY: `slot` is considered pinned. let val = unsafe { Pin::new_unchecked(val) }; - (self.1)(val).map_err(|e| { - // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + // SAFETY: `slot` was initialized above. + (self.1)(val).inspect_err(|_| unsafe { core::ptr::drop_in_place(slot) }) } } @@ -941,11 +938,9 @@ where // SAFETY: All requirements fulfilled since this function is `__init`. unsafe { self.0.__pinned_init(slot)? }; // SAFETY: The above call initialized `slot` and we still have unique access. - (self.1)(unsafe { &mut *slot }).map_err(|e| { + (self.1)(unsafe { &mut *slot }).inspect_err(|_| // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + unsafe { core::ptr::drop_in_place(slot) }) } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index e6b7d3a80bbce6f16192022b77bc45f863a0d7fa..274bdc1b0a824a6cde07d2149928acf253d4e940 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -40,6 +40,7 @@ pub mod ioctl; pub mod kunit; #[cfg(CONFIG_NET)] pub mod net; +pub mod page; pub mod prelude; pub mod print; mod static_assert; @@ -50,6 +51,7 @@ pub mod sync; pub mod task; pub mod time; pub mod types; +pub mod uaccess; pub mod workqueue; #[doc(hidden)] diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs new file mode 100644 index 0000000000000000000000000000000000000000..208a006d587c1516e4e7a40540d13607583aa23d --- /dev/null +++ b/rust/kernel/page.rs @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel page allocation and management. + +use crate::{ + alloc::{AllocError, Flags}, + bindings, + error::code::*, + error::Result, + uaccess::UserSliceReader, +}; +use core::ptr::{self, NonNull}; + +/// A bitwise shift for the page size. +pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize; + +/// The number of bytes in a page. +pub const PAGE_SIZE: usize = bindings::PAGE_SIZE; + +/// A bitmask that gives the page containing a given address. +pub const PAGE_MASK: usize = !(PAGE_SIZE - 1); + +/// A pointer to a page that owns the page allocation. +/// +/// # Invariants +/// +/// The pointer is valid, and has ownership over the page. +pub struct Page { + page: NonNull, +} + +// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across +// threads is safe. +unsafe impl Send for Page {} + +// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing +// them concurrently is safe. +unsafe impl Sync for Page {} + +impl Page { + /// Allocates a new page. + /// + /// # Examples + /// + /// Allocate memory for a page. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL)?; + /// # Ok(()) } + /// ``` + /// + /// Allocate memory for a page and zero its contents. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?; + /// # Ok(()) } + /// ``` + pub fn alloc_page(flags: Flags) -> Result { + // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it + // is always safe to call this method. + let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) }; + let page = NonNull::new(page).ok_or(AllocError)?; + // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly + // allocated page. We transfer that ownership to the new `Page` object. + Ok(Self { page }) + } + + /// Returns a raw pointer to the page. + pub fn as_ptr(&self) -> *mut bindings::page { + self.page.as_ptr() + } + + /// Runs a piece of code with this page mapped to an address. + /// + /// The page is unmapped when this call returns. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might + /// only be mapped on the current thread, and when that is the case, dereferencing it on other + /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't + /// cause data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_page_mapped(&self, f: impl FnOnce(*mut u8) -> T) -> T { + // SAFETY: `page` is valid due to the type invariants on `Page`. + let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) }; + + let res = f(mapped_addr.cast()); + + // This unmaps the page mapped above. + // + // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner + // where the pages are unmapped in reverse order. This is as required by `kunmap_local`. + // + // In other words, if this call to `kunmap_local` happens when a different page should be + // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the + // call just above in `with_page_mapped` that made that possible. In this case, it is the + // unsafe block that wraps that other call that is incorrect. + unsafe { bindings::kunmap_local(mapped_addr) }; + + res + } + + /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking. + /// + /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the + /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on + /// this task, as this method uses a local mapping. + /// + /// If `off` and `len` refers to a region outside of this page, then this method returns + /// [`EINVAL`] and does not call `f`. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `len` bytes and for the duration in which the closure is called. The pointer might only be + /// mapped on the current thread, and when that is the case, dereferencing it on other threads + /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause + /// data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_pointer_into_page( + &self, + off: usize, + len: usize, + f: impl FnOnce(*mut u8) -> Result, + ) -> Result { + let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE; + + if bounds_ok { + self.with_page_mapped(move |page_addr| { + // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will + // result in a pointer that is in bounds or one off the end of the page. + f(unsafe { page_addr.add(off) }) + }) + } else { + Err(EINVAL) + } + } + + /// Maps the page and reads from it into the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `dst` is valid for writing `len` bytes. + /// * Callers must ensure that this call does not race with a write to the same page that + /// overlaps with this read. + pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |src| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then + // it has performed a bounds check and guarantees that `src` is + // valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and writes into it from the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `src` is valid for reading `len` bytes. + /// * Callers must ensure that this call does not race with a read or write to the same page + /// that overlaps with this write. + pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and zeroes the given slice. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::write_bytes(dst, 0u8, len) }; + Ok(()) + }) + } + + /// Copies data from userspace into this page. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address. + /// However, they are not allowed on the page you are copying into. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn copy_from_user_slice_raw( + &self, + reader: &mut UserSliceReader, + offset: usize, + len: usize, + ) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have + // exclusive access to the slice since the caller guarantees that there are no races. + reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) }) + }) + } +} + +impl Drop for Page { + fn drop(&mut self) { + // SAFETY: By the type invariants, we have ownership of the page and can free it. + unsafe { bindings::__free_pages(self.page.as_ptr(), 0) }; + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 2e7c9008621f5dce10437ef577ebb2ea3d9d2d7f..bd189d646adb9a111cdb7ffe2dd6282e2069dabe 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -409,3 +409,67 @@ pub enum Either { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// Types for which any bit pattern is valid. +/// +/// Not all types are valid for all values. For example, a `bool` must be either zero or one, so +/// reading arbitrary bytes into something that contains a `bool` is not okay. +/// +/// It's okay for the type to have padding, as initializing those bytes has no effect. +/// +/// # Safety +/// +/// All bit-patterns must be valid for this type. This type must not have interior mutability. +pub unsafe trait FromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl FromBytes for u8 {} +unsafe impl FromBytes for u16 {} +unsafe impl FromBytes for u32 {} +unsafe impl FromBytes for u64 {} +unsafe impl FromBytes for usize {} +unsafe impl FromBytes for i8 {} +unsafe impl FromBytes for i16 {} +unsafe impl FromBytes for i32 {} +unsafe impl FromBytes for i64 {} +unsafe impl FromBytes for isize {} +// SAFETY: If all bit patterns are acceptable for individual values in an array, then all bit +// patterns are also acceptable for arrays of that type. +unsafe impl FromBytes for [T] {} +unsafe impl FromBytes for [T; N] {} + +/// Types that can be viewed as an immutable slice of initialized bytes. +/// +/// If a struct implements this trait, then it is okay to copy it byte-for-byte to userspace. This +/// means that it should not have any padding, as padding bytes are uninitialized. Reading +/// uninitialized memory is not just undefined behavior, it may even lead to leaking sensitive +/// information on the stack to userspace. +/// +/// The struct should also not hold kernel pointers, as kernel pointer addresses are also considered +/// sensitive. However, leaking kernel pointers is not considered undefined behavior by Rust, so +/// this is a correctness requirement, but not a safety requirement. +/// +/// # Safety +/// +/// Values of this type may not contain any uninitialized bytes. This type must not have interior +/// mutability. +pub unsafe trait AsBytes {} + +// SAFETY: Instances of the following types have no uninitialized portions. +unsafe impl AsBytes for u8 {} +unsafe impl AsBytes for u16 {} +unsafe impl AsBytes for u32 {} +unsafe impl AsBytes for u64 {} +unsafe impl AsBytes for usize {} +unsafe impl AsBytes for i8 {} +unsafe impl AsBytes for i16 {} +unsafe impl AsBytes for i32 {} +unsafe impl AsBytes for i64 {} +unsafe impl AsBytes for isize {} +unsafe impl AsBytes for bool {} +unsafe impl AsBytes for char {} +unsafe impl AsBytes for str {} +// SAFETY: If individual values in an array have no uninitialized portions, then the array itself +// does not have any uninitialized portions either. +unsafe impl AsBytes for [T] {} +unsafe impl AsBytes for [T; N] {} diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs new file mode 100644 index 0000000000000000000000000000000000000000..e9347cff99ab202e3d19a612ddeb2edd970022b7 --- /dev/null +++ b/rust/kernel/uaccess.rs @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Slices to user space memory regions. +//! +//! C header: [`include/linux/uaccess.h`](srctree/include/linux/uaccess.h) + +use crate::{ + alloc::Flags, + bindings, + error::Result, + prelude::*, + types::{AsBytes, FromBytes}, +}; +use alloc::vec::Vec; +use core::ffi::{c_ulong, c_void}; +use core::mem::{size_of, MaybeUninit}; + +/// The type used for userspace addresses. +pub type UserPtr = usize; + +/// A pointer to an area in userspace memory, which can be either read-only or read-write. +/// +/// All methods on this struct are safe: attempting to read or write on bad addresses (either out of +/// the bound of the slice or unmapped addresses) will return [`EFAULT`]. Concurrent access, +/// *including data races to/from userspace memory*, is permitted, because fundamentally another +/// userspace thread/process could always be modifying memory at the same time (in the same way that +/// userspace Rust's [`std::io`] permits data races with the contents of files on disk). In the +/// presence of a race, the exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data after completing a read, and not +/// expect that multiple reads of the same address will return the same value. +/// +/// These APIs are designed to make it difficult to accidentally write TOCTOU (time-of-check to +/// time-of-use) bugs. Every time a memory location is read, the reader's position is advanced by +/// the read length and the next read will start from there. This helps prevent accidentally reading +/// the same location twice and causing a TOCTOU bug. +/// +/// Creating a [`UserSliceReader`] and/or [`UserSliceWriter`] consumes the `UserSlice`, helping +/// ensure that there aren't multiple readers or writers to the same location. +/// +/// If double-fetching a memory location is necessary for some reason, then that is done by creating +/// multiple readers to the same memory location, e.g. using [`clone_reader`]. +/// +/// # Examples +/// +/// Takes a region of userspace memory from the current process, and modify it by adding one to +/// every byte in the region. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::Result; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result<()> { +/// let (read, mut write) = UserSlice::new(uptr, len).reader_writer(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// for b in &mut buf { +/// *b = b.wrapping_add(1); +/// } +/// +/// write.write_slice(&buf)?; +/// Ok(()) +/// } +/// ``` +/// +/// Example illustrating a TOCTOU (time-of-check to time-of-use) bug. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::{code::EINVAL, Result}; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// /// Returns whether the data in this region is valid. +/// fn is_valid(uptr: UserPtr, len: usize) -> Result { +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// todo!() +/// } +/// +/// /// Returns the bytes behind this user pointer if they are valid. +/// fn get_bytes_if_valid(uptr: UserPtr, len: usize) -> Result> { +/// if !is_valid(uptr, len)? { +/// return Err(EINVAL); +/// } +/// +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// // THIS IS A BUG! The bytes could have changed since we checked them. +/// // +/// // To avoid this kind of bug, don't call `UserSlice::new` multiple +/// // times with the same address. +/// Ok(buf) +/// } +/// ``` +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +/// [`clone_reader`]: UserSliceReader::clone_reader +pub struct UserSlice { + ptr: UserPtr, + length: usize, +} + +impl UserSlice { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// Constructing a [`UserSlice`] performs no checks on the provided address and length, it can + /// safely be constructed inside a kernel thread with no current userspace process. Reads and + /// writes wrap the kernel APIs `copy_from_user` and `copy_to_user`, which check the memory map + /// of the current process and enforce that the address range is within the user range (no + /// additional calls to `access_ok` are needed). Validity of the pointer is checked when you + /// attempt to read or write, not in the call to `UserSlice::new`. + /// + /// Callers must be careful to avoid time-of-check-time-of-use (TOCTOU) issues. The simplest way + /// is to create a single instance of [`UserSlice`] per user memory block as it reads each byte + /// at most once. + pub fn new(ptr: UserPtr, length: usize) -> Self { + UserSlice { ptr, length } + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(self, buf: &mut Vec, flags: Flags) -> Result { + self.reader().read_all(buf, flags) + } + + /// Constructs a [`UserSliceReader`]. + pub fn reader(self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs a [`UserSliceWriter`]. + pub fn writer(self) -> UserSliceWriter { + UserSliceWriter { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs both a [`UserSliceReader`] and a [`UserSliceWriter`]. + /// + /// Usually when this is used, you will first read the data, and then overwrite it afterwards. + pub fn reader_writer(self) -> (UserSliceReader, UserSliceWriter) { + ( + UserSliceReader { + ptr: self.ptr, + length: self.length, + }, + UserSliceWriter { + ptr: self.ptr, + length: self.length, + }, + ) + } +} + +/// A reader for [`UserSlice`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSliceReader { + ptr: UserPtr, + length: usize, +} + +impl UserSliceReader { + /// Skip the provided number of bytes. + /// + /// Returns an error if skipping more than the length of the buffer. + pub fn skip(&mut self, num_skip: usize) -> Result { + // Update `self.length` first since that's the fallible part of this operation. + self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?; + self.ptr = self.ptr.wrapping_add(num_skip); + Ok(()) + } + + /// Create a reader that can access the same range of data. + /// + /// Reading from the clone does not advance the current reader. + /// + /// The caller should take care to not introduce TOCTOU issues, as described in the + /// documentation for [`UserSlice`]. + pub fn clone_reader(&self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Returns the number of bytes left to be read from this reader. + /// + /// Note that even reading less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no data is available in the io buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// For a version that uses `&mut [u8]`, please see [`UserSliceReader::read_slice`]. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + /// + /// # Guarantees + /// + /// After a successful call to this method, all bytes in `out` are initialized. + pub fn read_raw(&mut self, out: &mut [MaybeUninit]) -> Result { + let len = out.len(); + let out_ptr = out.as_mut_ptr().cast::(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `out_ptr` points into a mutable slice of length `len_ulong`, so we may write + // that many bytes to it. + let res = + unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + pub fn read_slice(&mut self, out: &mut [u8]) -> Result { + // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to + // `out`. + let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit]) }; + self.read_raw(out) + } + + /// Reads a value of the specified type. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. + pub fn read(&mut self) -> Result { + let len = size_of::(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + let mut out: MaybeUninit = MaybeUninit::uninit(); + // SAFETY: The local variable `out` is valid for writing `size_of::()` bytes. + // + // By using the _copy_from_user variant, we skip the check_object_size check that verifies + // the kernel pointer. This mirrors the logic on the C side that skips the check when the + // length is a compile-time constant. + let res = unsafe { + bindings::_copy_from_user( + out.as_mut_ptr().cast::(), + self.ptr as *const c_void, + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements + // `FromBytes`, any bit-pattern is a valid value for this type. + Ok(unsafe { out.assume_init() }) + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(mut self, buf: &mut Vec, flags: Flags) -> Result { + let len = self.length; + VecExt::::reserve(buf, len, flags)?; + + // The call to `try_reserve` was successful, so the spare capacity is at least `len` bytes + // long. + self.read_raw(&mut buf.spare_capacity_mut()[..len])?; + + // SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the + // vector have been initialized. + unsafe { buf.set_len(buf.len() + len) }; + Ok(()) + } +} + +/// A writer for [`UserSlice`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSliceWriter { + ptr: UserPtr, + length: usize, +} + +impl UserSliceWriter { + /// Returns the amount of space remaining in this buffer. + /// + /// Note that even writing less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no more data can be written to this buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Writes raw data to this user pointer from a kernel buffer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write_slice(&mut self, data: &[u8]) -> Result { + let len = data.len(); + let data_ptr = data.as_ptr().cast::(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `data_ptr` points into an immutable slice of length `len_ulong`, so we may read + // that many bytes from it. + let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Writes the provided Rust value to this userspace pointer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write(&mut self, value: &T) -> Result { + let len = size_of::(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: The reference points to a value of type `T`, so it is valid for reading + // `size_of::()` bytes. + // + // By using the _copy_to_user variant, we skip the check_object_size check that verifies the + // kernel pointer. This mirrors the logic on the C side that skips the check when the length + // is a compile-time constant. + let res = unsafe { + bindings::_copy_to_user( + self.ptr as *mut c_void, + (value as *const T).cast::(), + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } +} diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 1cec63a2aea8b0529216b723d9deeaece05a3830..553a5cba2adcb58cab56751c32398a0408e87bcd 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -482,24 +482,26 @@ pub unsafe trait HasWork { /// use kernel::sync::Arc; /// use kernel::workqueue::{self, impl_has_work, Work}; /// -/// struct MyStruct { -/// work_field: Work, +/// struct MyStruct<'a, T, const N: usize> { +/// work_field: Work, 17>, +/// f: fn(&'a [T; N]), /// } /// /// impl_has_work! { -/// impl HasWork for MyStruct { self.work_field } +/// impl{'a, T, const N: usize} HasWork, 17> +/// for MyStruct<'a, T, N> { self.work_field } /// } /// ``` #[macro_export] macro_rules! impl_has_work { - ($(impl$(<$($implarg:ident),*>)? + ($(impl$({$($generics:tt)*})? HasWork<$work_type:ty $(, $id:tt)?> - for $self:ident $(<$($selfarg:ident),*>)? + for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right // type. - unsafe impl$(<$($implarg),*>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self $(<$($selfarg),*>)? { + unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] @@ -515,7 +517,7 @@ macro_rules! impl_has_work { pub use impl_has_work; impl_has_work! { - impl HasWork for ClosureWork { self.work } + impl{T} HasWork for ClosureWork { self.work } } unsafe impl WorkItemPointer for Arc diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 520eae5fd792810069d8018d922f6c56c0bcc101..5be0cb9db3ee490c1b01878f58b89ab758fcacac 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -35,6 +35,7 @@ use proc_macro::TokenStream; /// author: "Rust for Linux Contributors", /// description: "My very own kernel module!", /// license: "GPL", +/// alias: ["alternate_module_name"], /// } /// /// struct MyModule; @@ -55,13 +56,45 @@ use proc_macro::TokenStream; /// } /// ``` /// +/// ## Firmware +/// +/// The following example shows how to declare a kernel module that needs +/// to load binary firmware files. You need to specify the file names of +/// the firmware in the `firmware` field. The information is embedded +/// in the `modinfo` section of the kernel module. For example, a tool to +/// build an initramfs uses this information to put the firmware files into +/// the initramfs image. +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyDeviceDriverModule, +/// name: "my_device_driver_module", +/// author: "Rust for Linux Contributors", +/// description: "My device driver requires firmware", +/// license: "GPL", +/// firmware: ["my_device_firmware1.bin", "my_device_firmware2.bin"], +/// } +/// +/// struct MyDeviceDriverModule; +/// +/// impl kernel::Module for MyDeviceDriverModule { +/// fn init() -> Result { +/// Ok(Self) +/// } +/// } +/// ``` +/// /// # Supported argument types /// - `type`: type which implements the [`Module`] trait (required). -/// - `name`: byte array of the name of the kernel module (required). -/// - `author`: byte array of the author of the kernel module. -/// - `description`: byte array of the description of the kernel module. -/// - `license`: byte array of the license of the kernel module (required). -/// - `alias`: byte array of alias name of the kernel module. +/// - `name`: ASCII string literal of the name of the kernel module (required). +/// - `author`: string literal of the author of the kernel module. +/// - `description`: string literal of the description of the kernel module. +/// - `license`: ASCII string literal of the license of the kernel module (required). +/// - `alias`: array of ASCII string literals of the alias names of the kernel module. +/// - `firmware`: array of ASCII string literals of the firmware files of +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) @@ -312,7 +345,7 @@ pub fn pinned_drop(args: TokenStream, input: TokenStream) -> TokenStream { /// /// Currently supported modifiers are: /// * `span`: change the span of concatenated identifier to the span of the specified token. By -/// default the span of the `[< >]` group is used. +/// default the span of the `[< >]` group is used. /// * `lower`: change the identifier to lower case. /// * `upper`: change the identifier to upper case. /// diff --git a/rust/macros/module.rs b/rust/macros/module.rs index acd0393b509574b0989bedcf1ed244532ab63a29..411dc103d82ecc02c3da4dfed981d3a7f2290107 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -97,14 +97,22 @@ struct ModuleInfo { author: Option, description: Option, alias: Option>, + firmware: Option>, } impl ModuleInfo { fn parse(it: &mut token_stream::IntoIter) -> Self { let mut info = ModuleInfo::default(); - const EXPECTED_KEYS: &[&str] = - &["type", "name", "author", "description", "license", "alias"]; + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "firmware", + ]; const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; let mut seen_keys = Vec::new(); @@ -131,6 +139,7 @@ impl ModuleInfo { "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), + "firmware" => info.firmware = Some(expect_string_array(it)), _ => panic!( "Unknown key \"{}\". Valid keys are: {:?}.", key, EXPECTED_KEYS @@ -186,6 +195,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { modinfo.emit("alias", &alias); } } + if let Some(firmware) = info.firmware { + for fw in firmware { + modinfo.emit("firmware", &fw); + } + } // Built-in modules also export the `file` modinfo string. let file = diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 0caad902ba40a2a7132ff3702fb98f76fb1a1b2f..80a00260e3e7a10cb9a9ed9d98f9d47606255ca2 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + dead_code, missing_docs, non_camel_case_types, non_upper_case_globals, diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 746ff2d272f25667d31dbe2299ce00e2f9ea000a..5694df3da2e95b0a606f7174acdd89adc9b1a9e0 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -19,10 +19,6 @@ #include "gcc-common.h" #include "randomize_layout_seed.h" -#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7) -#error "The RANDSTRUCT plugin requires GCC 4.7 or newer." -#endif - #define ORIG_TYPE_NAME(node) \ (TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous") diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh index 825c75c5b715051405905c599ff0e7b9f19a9106..9459ca4f0f11fb28d807d1e0bacee3f63bb27062 100755 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -5,4 +5,4 @@ # -mstack-protector-guard-reg, added by # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708 -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs" diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 75e4e22b986adcfd07197777c5d59d5601d3c920..f680bb01aeeb30fcf3fe44289f2db4b4c8158d63 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,4 +1,4 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f270c7b0cf345d948db8d03f488dc3de45a7e599..d2bc63cde8c6a39e74c95abc7329190fe460207d 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -145,6 +145,7 @@ def main(): parser.add_argument('--cfgs', action='append', default=[]) parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot", type=pathlib.Path) parser.add_argument("sysroot_src", type=pathlib.Path) parser.add_argument("exttree", type=pathlib.Path, nargs="?") args = parser.parse_args() @@ -154,9 +155,12 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) + # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. + assert args.sysroot in args.sysroot_src.parents + rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), - "sysroot_src": str(args.sysroot_src), + "sysroot": str(args.sysroot), } json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 87f34925eb7b71b7858124627fb2fee596e17aa5..404edf7587e088ca057ca943ff74e397e77d4a6e 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -162,7 +162,7 @@ fn main() { "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } @@ -179,7 +179,7 @@ fn main() { "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0ed873491bf555e64da7b92662459d28ad8e7382..123dab0572f80045d06c053a5d905bc15660b857 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,8 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] - * [--lto-clang] in.map > out.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -62,7 +61,6 @@ static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; -static int lto_clang; static int token_profit[0x10000]; @@ -73,8 +71,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " - "[--lto-clang] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); exit(1); } @@ -344,25 +341,6 @@ static bool symbol_absolute(const struct sym_entry *s) return s->percpu_absolute; } -static void cleanup_symbol_name(char *s) -{ - char *p; - - /* - * ASCII[.] = 2e - * ASCII[0-9] = 30,39 - * ASCII[A-Z] = 41,5a - * ASCII[_] = 5f - * ASCII[a-z] = 61,7a - * - * As above, replacing the first '.' in ".llvm." with '\0' does not - * affect the main sorting, but it helps us with subsorting. - */ - p = strstr(s, ".llvm."); - if (p) - *p = '\0'; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -526,10 +504,6 @@ static void write_src(void) output_address(relative_base); printf("\n"); - if (lto_clang) - for (i = 0; i < table_cnt; i++) - cleanup_symbol_name((char *)table[i]->sym); - sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) @@ -807,7 +781,6 @@ int main(int argc, char **argv) static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, {"absolute-percpu", no_argument, &absolute_percpu, 1}, - {"lto-clang", no_argument, <o_clang, 1}, {}, }; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba95610e210de1da71fa5f56d03600a..22d0bc8439863f2a11fc7ae144ecb3b52f447972 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -156,10 +156,6 @@ kallsyms() kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_LTO_CLANG; then - kallsymopt="${kallsymopt} --lto-clang" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 74355ff0e1063fcfc1f37eea90e3b8651923f966..ac3e5ac01d8a4daa031bc9e70b792a68f74c388b 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -74,7 +74,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA echo "/lib/modules/%{KERNELRELEASE}" for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \ - devname softdep symbols symbols.bin; do + devname softdep symbols symbols.bin weakdep; do echo "%ghost /lib/modules/%{KERNELRELEASE}/modules.${x}" done diff --git a/scripts/package/mkspec b/scripts/package/mkspec index ead54d67a024150fcdc96e178e9aa658994f4afd..4dc1466dfc815c110eb7206f83dd874b17f5170f 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -50,6 +50,6 @@ fi cat << EOF %changelog -* $(LC_ALL=C; date +'%a %b %d %Y') ${name} <${email}> +* $(LC_ALL=C date +'%a %b %d %Y') ${name} <${email}> - Custom built Linux kernel. EOF diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh index 117018946b577a791ef13e37ddeb5f25d1c1a9d7..5262c56dd674e6335b5dc79b8533ff908ed82e23 100755 --- a/scripts/rust_is_available.sh +++ b/scripts/rust_is_available.sh @@ -117,20 +117,16 @@ if [ "$rust_compiler_cversion" -lt "$rust_compiler_min_cversion" ]; then echo >&2 "***" exit 1 fi -if [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then - echo >&2 "***" - echo >&2 "*** Rust compiler '$RUSTC' is too new. This may or may not work." - echo >&2 "*** Your version: $rust_compiler_version" - echo >&2 "*** Expected version: $rust_compiler_min_version" - echo >&2 "***" - warning=1 -fi # Check that the Rust bindings generator is suitable. # # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`. +# +# The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 +# (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when +# the minimum version is upgraded past that (0.69.1 already fixed the issue). rust_bindings_generator_output=$( \ - LC_ALL=C "$BINDGEN" --version 2>/dev/null + LC_ALL=C "$BINDGEN" --version workaround-for-0.69.0 2>/dev/null ) || rust_bindings_generator_code=$? if [ -n "$rust_bindings_generator_code" ]; then echo >&2 "***" @@ -165,13 +161,18 @@ if [ "$rust_bindings_generator_cversion" -lt "$rust_bindings_generator_min_cvers echo >&2 "***" exit 1 fi -if [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cversion" ]; then - echo >&2 "***" - echo >&2 "*** Rust bindings generator '$BINDGEN' is too new. This may or may not work." - echo >&2 "*** Your version: $rust_bindings_generator_version" - echo >&2 "*** Expected version: $rust_bindings_generator_min_version" - echo >&2 "***" - warning=1 +if [ "$rust_bindings_generator_cversion" -eq 6600 ] || + [ "$rust_bindings_generator_cversion" -eq 6601 ]; then + # Distributions may have patched the issue (e.g. Debian did). + if ! "$BINDGEN" $(dirname $0)/rust_is_available_bindgen_0_66.h >/dev/null; then + echo >&2 "***" + echo >&2 "*** Rust bindings generator '$BINDGEN' versions 0.66.0 and 0.66.1 may not" + echo >&2 "*** work due to a bug (https://github.com/rust-lang/rust-bindgen/pull/2567)," + echo >&2 "*** unless patched (like Debian's)." + echo >&2 "*** Your version: $rust_bindings_generator_version" + echo >&2 "***" + warning=1 + fi fi # Check that the `libclang` used by the Rust bindings generator is suitable. diff --git a/scripts/rust_is_available_bindgen_0_66.h b/scripts/rust_is_available_bindgen_0_66.h new file mode 100644 index 0000000000000000000000000000000000000000..c0431293421c8244553e7c596ecca1da77ad1bd5 --- /dev/null +++ b/scripts/rust_is_available_bindgen_0_66.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define A "\0" diff --git a/scripts/rust_is_available_test.py b/scripts/rust_is_available_test.py index 57613fe5ed754585394edb86aef29f2f52b4bf93..413741037fb307a2b4399f76aa7ad0294a759e55 100755 --- a/scripts/rust_is_available_test.py +++ b/scripts/rust_is_available_test.py @@ -54,18 +54,34 @@ else: """) @classmethod - def generate_bindgen(cls, version_stdout, libclang_stderr): + def generate_bindgen(cls, version_stdout, libclang_stderr, version_0_66_patched=False): + if libclang_stderr is None: + libclang_case = f"raise SystemExit({cls.bindgen_default_bindgen_libclang_failure_exit_code})" + else: + libclang_case = f"print({repr(libclang_stderr)}, file=sys.stderr)" + + if version_0_66_patched: + version_0_66_case = "pass" + else: + version_0_66_case = "raise SystemExit(1)" + return cls.generate_executable(f"""#!/usr/bin/env python3 import sys if "rust_is_available_bindgen_libclang.h" in " ".join(sys.argv): - print({repr(libclang_stderr)}, file=sys.stderr) + {libclang_case} +elif "rust_is_available_bindgen_0_66.h" in " ".join(sys.argv): + {version_0_66_case} else: print({repr(version_stdout)}) """) @classmethod - def generate_bindgen_version(cls, stdout): - return cls.generate_bindgen(stdout, cls.bindgen_default_bindgen_libclang_stderr) + def generate_bindgen_version(cls, stdout, version_0_66_patched=False): + return cls.generate_bindgen(stdout, cls.bindgen_default_bindgen_libclang_stderr, version_0_66_patched) + + @classmethod + def generate_bindgen_libclang_failure(cls): + return cls.generate_bindgen(cls.bindgen_default_bindgen_version_stdout, None) @classmethod def generate_bindgen_libclang(cls, stderr): @@ -89,6 +105,7 @@ else: cls.rust_default_sysroot = subprocess.check_output(("rustc", "--print", "sysroot")).decode().strip() cls.bindgen_default_bindgen_version_stdout = f"bindgen {cls.bindgen_default_version}" + cls.bindgen_default_bindgen_libclang_failure_exit_code = 42 cls.bindgen_default_bindgen_libclang_stderr = f"scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {cls.llvm_default_version} [-W#pragma-messages], err: false" cls.default_rustc = cls.generate_rustc(f"rustc {cls.rustc_default_version}") @@ -193,11 +210,6 @@ else: result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc }) self.assertIn(f"Rust compiler '{rustc}' is too old.", result.stderr) - def test_rustc_new_version(self): - rustc = self.generate_rustc("rustc 1.999.0 (a8314ef7d 2099-06-27)") - result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "RUSTC": rustc }) - self.assertIn(f"Rust compiler '{rustc}' is too new. This may or may not work.", result.stderr) - def test_bindgen_nonexecutable(self): result = self.run_script(self.Expected.FAILURE, { "BINDGEN": self.nonexecutable }) self.assertIn(f"Running '{self.nonexecutable}' to check the Rust bindings generator version failed with", result.stderr) @@ -226,21 +238,24 @@ else: result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen }) self.assertIn(f"Rust bindings generator '{bindgen}' is too old.", result.stderr) - def test_bindgen_new_version(self): - bindgen = self.generate_bindgen_version("bindgen 0.999.0") - result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen }) - self.assertIn(f"Rust bindings generator '{bindgen}' is too new. This may or may not work.", result.stderr) + def test_bindgen_bad_version_0_66_0_and_0_66_1(self): + for version in ("0.66.0", "0.66.1"): + with self.subTest(version=version): + bindgen = self.generate_bindgen_version(f"bindgen {version}") + result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen }) + self.assertIn(f"Rust bindings generator '{bindgen}' versions 0.66.0 and 0.66.1 may not", result.stderr) + + def test_bindgen_bad_version_0_66_0_and_0_66_1_patched(self): + for version in ("0.66.0", "0.66.1"): + with self.subTest(version=version): + bindgen = self.generate_bindgen_version(f"bindgen {version}", True) + result = self.run_script(self.Expected.SUCCESS, { "BINDGEN": bindgen }) def test_bindgen_libclang_failure(self): - for env in ( - { "LLVM_CONFIG_PATH": self.missing }, - { "LIBCLANG_PATH": self.missing }, - { "CLANG_PATH": self.missing }, - ): - with self.subTest(env=env): - result = self.run_script(self.Expected.FAILURE, env | { "PATH": os.environ["PATH"], "BINDGEN": "bindgen" }) - self.assertIn("Running 'bindgen' to check the libclang version (used by the Rust", result.stderr) - self.assertIn("bindings generator) failed with code ", result.stderr) + bindgen = self.generate_bindgen_libclang_failure() + result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen }) + self.assertIn(f"Running '{bindgen}' to check the libclang version (used by the Rust", result.stderr) + self.assertIn(f"bindings generator) failed with code {self.bindgen_default_bindgen_libclang_failure_exit_code}. This may be caused by", result.stderr) def test_bindgen_libclang_unexpected_version(self): bindgen = self.generate_bindgen_libclang("scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version unexpected [-W#pragma-messages], err: false") diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index 591d85e8ca7e357ac80ab99eecfcba4cc2985423..845e24eb372e7f2aca193a4f424b85234b4fc9c2 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -53,6 +53,7 @@ 39 common umount2 sys_umount 40 common mount sys_mount 41 common pivot_root sys_pivot_root +42 common nfsservctl sys_ni_syscall 43 32 statfs64 sys_statfs64 compat_sys_statfs64 43 64 statfs sys_statfs 44 32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 @@ -98,9 +99,9 @@ 77 common tee sys_tee 78 common readlinkat sys_readlinkat 79 stat64 fstatat64 sys_fstatat64 -79 newstat fstatat sys_newfstatat +79 64 newfstatat sys_newfstatat 80 stat64 fstat64 sys_fstat64 -80 newstat fstat sys_newfstat +80 64 fstat sys_newfstat 81 common sync sys_sync 82 common fsync sys_fsync 83 common fdatasync sys_fdatasync @@ -402,4 +403,3 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index bcfea073e3f2e386ada23becdd1e1be92f4878e1..01b923d97a4461ed7e01b09d330132eedc476ec8 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1692,6 +1692,10 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) struct aa_profile *p; p = aa_deref_parent(profile); dent = prof_dir(p); + if (!dent) { + error = -ENOENT; + goto fail2; + } /* adding to parent that previously didn't have children */ dent = aafs_create_dir("profiles", dent); if (IS_ERR(dent)) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index c03eb7c19f16229bc7fc89ac1dfd9fe6ae6f5fa9..d52a5b14dad4c75f24acdb93a07a148ee620d9f2 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -144,19 +144,6 @@ int aa_audit_file(const struct cred *subj_cred, return aa_audit(type, profile, &ad, file_audit_cb); } -/** - * is_deleted - test if a file has been completely unlinked - * @dentry: dentry of file to test for deletion (NOT NULL) - * - * Returns: true if deleted else false - */ -static inline bool is_deleted(struct dentry *dentry) -{ - if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0) - return true; - return false; -} - static int path_name(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, char *buffer, diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 58fdc72af66455b78f4fd6df5a3c9a5ffcd1e4d8..7265d2f81dd50da010fab34cf606d4605f90f4a4 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -63,6 +63,26 @@ static inline struct aa_label *aa_get_newest_cred_label(const struct cred *cred) return aa_get_newest_label(aa_cred_raw_label(cred)); } +static inline struct aa_label *aa_get_newest_cred_label_condref(const struct cred *cred, + bool *needput) +{ + struct aa_label *l = aa_cred_raw_label(cred); + + if (unlikely(label_is_stale(l))) { + *needput = true; + return aa_get_newest_label(l); + } + + *needput = false; + return l; +} + +static inline void aa_put_label_condref(struct aa_label *l, bool needput) +{ + if (unlikely(needput)) + aa_put_label(l); +} + /** * aa_current_raw_label - find the current tasks confining label * diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 16568b6d589d3111e00f78f9e245db10267957bc..808060f9effb73f14e4cb894db98c4e887501597 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -461,6 +461,7 @@ static int apparmor_file_open(struct file *file) struct aa_file_ctx *fctx = file_ctx(file); struct aa_label *label; int error = 0; + bool needput; if (!path_mediated_fs(file->f_path.dentry)) return 0; @@ -477,7 +478,7 @@ static int apparmor_file_open(struct file *file) return 0; } - label = aa_get_newest_cred_label(file->f_cred); + label = aa_get_newest_cred_label_condref(file->f_cred, &needput); if (!unconfined(label)) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); @@ -494,7 +495,7 @@ static int apparmor_file_open(struct file *file) /* todo cache full allowed permissions set and state */ fctx->allow = aa_map_file_to_perms(file); } - aa_put_label(label); + aa_put_label_condref(label, needput); return error; } @@ -1124,7 +1125,7 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) * @sock: socket that is being setup * @family: family of socket being created * @type: type of the socket - * @ptotocol: protocol of the socket + * @protocol: protocol of the socket * @kern: socket is a special kernel socket * * Note: @@ -1304,6 +1305,13 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!skb->secmark) return 0; + /* + * If reach here before socket_post_create hook is called, in which + * case label is null, drop the packet. + */ + if (!ctx->label) + return -EACCES; + return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE, skb->secmark, sk); } diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 49fe8da6fea4597399f1b2113f230353a8b8c674..bf8863253e07226731c059a407a06a041909c1db 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -44,6 +44,8 @@ static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags) audit_log_format(ab, ", mand"); if (flags & MS_DIRSYNC) audit_log_format(ab, ", dirsync"); + if (flags & MS_NOSYMFOLLOW) + audit_log_format(ab, ", nosymfollow"); if (flags & MS_NOATIME) audit_log_format(ab, ", noatime"); if (flags & MS_NODIRATIME) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 957654d253dd74cb50ac2a666a1588a03ac2e821..14df15e3569525fb16fd16360b6cfe2a78688c55 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -225,7 +225,7 @@ static void aa_free_data(void *ptr, void *arg) { struct aa_data *data = ptr; - kfree_sensitive(data->data); + kvfree_sensitive(data->data, data->size); kfree_sensitive(data->key); kfree_sensitive(data); } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5e578ef0ddffb1f7adb0cc9863bcbd18ec9a562b..5a570235427d87cb8cc7513abb77a7891a7ab246 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -747,34 +747,42 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, *info = "missing required dfa"; goto fail; } - goto out; + } else { + /* + * only unpack the following if a dfa is present + * + * sadly start was given different names for file and policydb + * but since it is optional we can try both + */ + if (!aa_unpack_u32(e, &pdb->start[0], "start")) + /* default start state */ + pdb->start[0] = DFA_START; + if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { + /* default start state for xmatch and file dfa */ + pdb->start[AA_CLASS_FILE] = DFA_START; + } /* setup class index */ + for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { + pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], + i); + } } /* - * only unpack the following if a dfa is present - * - * sadly start was given different names for file and policydb - * but since it is optional we can try both + * Unfortunately due to a bug in earlier userspaces, a + * transition table may be present even when the dfa is + * not. For compatibility reasons unpack and discard. */ - if (!aa_unpack_u32(e, &pdb->start[0], "start")) - /* default start state */ - pdb->start[0] = DFA_START; - if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { - /* default start state for xmatch and file dfa */ - pdb->start[AA_CLASS_FILE] = DFA_START; - } /* setup class index */ - for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { - pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], - i); - } if (!unpack_trans_table(e, &pdb->trans) && required_trans) { *info = "failed to unpack profile transition table"; goto fail; } + if (!pdb->dfa && pdb->trans.table) + aa_free_str_table(&pdb->trans); + /* TODO: move compat mapping here, requires dfa merging first */ /* TODO: move verify here, it has to be done after compat mappings */ -out: + *policy = pdb; return 0; @@ -1071,6 +1079,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (rhashtable_insert_fast(profile->data, &data->head, profile->data->p)) { + kvfree_sensitive(data->data, data->size); kfree_sensitive(data->key); kfree_sensitive(data); info = "failed to insert data to table"; diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index 5c9bde25e56df9c8e3459af8afaf886f718287a2..874fcf97794eed411ad8a5ed4af0c95c39a3b4a6 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -604,4 +604,5 @@ static struct kunit_suite apparmor_policy_unpack_test_module = { kunit_test_suite(apparmor_policy_unpack_test_module); +MODULE_DESCRIPTION("KUnit tests for AppArmor's policy unpack"); MODULE_LICENSE("GPL"); diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b5f81a05be3676457fee411b753ac64588eaf4de..4edc5bbbcda3c99553896d6cc6c12226697fa727 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -186,20 +186,21 @@ out: return ret; } -static int decrypt_blob_key(u8 *key) +static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key) { - return do_dcp_crypto(key, key, false); + return do_dcp_crypto(encrypted_key, plain_key, false); } -static int encrypt_blob_key(u8 *key) +static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key) { - return do_dcp_crypto(key, key, true); + return do_dcp_crypto(plain_key, encrypted_key, true); } static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; blen = calc_blob_len(p->key_len); if (blen > MAX_BLOB_SIZE) @@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) b->fmt_version = DCP_BLOB_VERSION; get_random_bytes(b->nonce, AES_KEYSIZE_128); - get_random_bytes(b->blob_key, AES_KEYSIZE_128); + get_random_bytes(plain_blob_key, AES_KEYSIZE_128); - ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key, + ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key, b->nonce, true); if (ret) { pr_err("Unable to encrypt blob payload: %i\n", ret); - return ret; + goto out; } - ret = encrypt_blob_key(b->blob_key); + ret = encrypt_blob_key(plain_blob_key, b->blob_key); if (ret) { pr_err("Unable to encrypt blob key: %i\n", ret); - return ret; + goto out; } - b->payload_len = get_unaligned_le32(&p->key_len); + put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; - return 0; + ret = 0; + +out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + + return ret; } static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; if (b->fmt_version != DCP_BLOB_VERSION) { pr_err("DCP blob has bad version: %i, expected %i\n", @@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) goto out; } - ret = decrypt_blob_key(b->blob_key); + ret = decrypt_blob_key(b->blob_key, plain_blob_key); if (ret) { pr_err("Unable to decrypt blob key: %i\n", ret); goto out; } ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN, - b->blob_key, b->nonce, false); + plain_blob_key, b->nonce, false); if (ret) { pr_err("Unwrap of DCP payload failed: %i\n", ret); goto out; @@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) ret = 0; out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + return ret; } diff --git a/security/landlock/cred.c b/security/landlock/cred.c index 786af18c4a1ca84f95d0d37c41af6b77f091aaa8..db9fe7d906ba665f35dc0e63c4551ea79a866a27 100644 --- a/security/landlock/cred.c +++ b/security/landlock/cred.c @@ -14,8 +14,8 @@ #include "ruleset.h" #include "setup.h" -static int hook_cred_prepare(struct cred *const new, - const struct cred *const old, const gfp_t gfp) +static void hook_cred_transfer(struct cred *const new, + const struct cred *const old) { struct landlock_ruleset *const old_dom = landlock_cred(old)->domain; @@ -23,6 +23,12 @@ static int hook_cred_prepare(struct cred *const new, landlock_get_ruleset(old_dom); landlock_cred(new)->domain = old_dom; } +} + +static int hook_cred_prepare(struct cred *const new, + const struct cred *const old, const gfp_t gfp) +{ + hook_cred_transfer(new, old); return 0; } @@ -36,6 +42,7 @@ static void hook_cred_free(struct cred *const cred) static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), + LSM_HOOK_INIT(cred_transfer, hook_cred_transfer), LSM_HOOK_INIT(cred_free, hook_cred_free), }; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 32eb67fb3e42c0f0e88b70ee37bf9a3afcd8e7d4..b49c44869dc462720bc9110f4a3aa895edb06537 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struct avc_node *node, { struct avc_xperms_decision_node *dest_xpd; - node->ae.xp_node->xp.len++; dest_xpd = avc_xperms_decision_alloc(src->used); if (!dest_xpd) return -ENOMEM; avc_copy_xperms_decision(&dest_xpd->xpd, src); list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head); + node->ae.xp_node->xp.len++; return 0; } @@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, node->ae.avd.auditdeny &= ~perms; break; case AVC_CALLBACK_ADD_XPERMS: - avc_add_xperms_decision(node, xpd); + rc = avc_add_xperms_decision(node, xpd); + if (rc) { + avc_node_kill(node); + goto out_unlock; + } break; } avc_node_replace(node, orig); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd78b8dcec24886c0ff5e919c67b85..bfa61e005aace203b4e66f799285f22c33e08902 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3852,7 +3852,17 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { int rc = 0; - if (vma_is_initial_heap(vma)) { + /* + * We don't use the vma_is_initial_heap() helper as it has + * a history of problems and is currently broken on systems + * where there is no heap, e.g. brk == start_brk. Before + * replacing the conditional below with vma_is_initial_heap(), + * or something similar, please ensure that the logic is the + * same as what we have below or you have tested every possible + * corner case you can think to test. + */ + if (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && (vma_is_initial_stack(vma) || diff --git a/sound/core/seq/seq_ports.h b/sound/core/seq/seq_ports.h index b111382f697aa6c6ce7b9bc0d2ff68deda804855..9e36738c0dd049698d72abf31cbbb4b383f8b649 100644 --- a/sound/core/seq/seq_ports.h +++ b/sound/core/seq/seq_ports.h @@ -7,6 +7,7 @@ #define __SND_SEQ_PORTS_H #include +#include #include "seq_lock.h" /* list of 'exported' ports */ @@ -42,17 +43,6 @@ struct snd_seq_port_subs_info { int (*close)(void *private_data, struct snd_seq_port_subscribe *info); }; -/* context for converting from legacy control event to UMP packet */ -struct snd_seq_ump_midi2_bank { - bool rpn_set; - bool nrpn_set; - bool bank_set; - unsigned char cc_rpn_msb, cc_rpn_lsb; - unsigned char cc_nrpn_msb, cc_nrpn_lsb; - unsigned char cc_data_msb, cc_data_lsb; - unsigned char cc_bank_msb, cc_bank_lsb; -}; - struct snd_seq_client_port { struct snd_seq_addr addr; /* client/port number */ @@ -88,7 +78,7 @@ struct snd_seq_client_port { unsigned char ump_group; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - struct snd_seq_ump_midi2_bank midi2_bank[16]; /* per channel */ + struct ump_cvt_to_ump_bank midi2_bank[16]; /* per channel */ #endif }; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index e90b27a135e6f14d4cdd5f3ce47c5dcf4788a42e..4dd540cbb1cbbcbf0df8bf9f563ca70ddfe2a4b8 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -368,7 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, struct snd_seq_ump_event ev_cvt; const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump; - struct snd_seq_ump_midi2_bank *cc; + struct ump_cvt_to_ump_bank *cc; ev_cvt = *event; memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump)); @@ -789,28 +789,45 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, return 1; } +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) +{ + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ -static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data, - unsigned char channel) +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *data, + unsigned char channel, + bool flush) { + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; data->rpn.bank = cc->cc_rpn_msb; data->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { data->rpn.status = UMP_MSG_STATUS_NRPN; data->rpn.bank = cc->cc_nrpn_msb; data->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); data->rpn.channel = channel; - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert CC event to MIDI 2.0 UMP */ @@ -822,29 +839,39 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; unsigned char val = event->data.control.value & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; + int ret; /* process special CC's (bank/rpn/nrpn) */ switch (index) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_msb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_lsb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = val; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = val; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = val; - return 0; // skip + return fill_rpn(cc, data, channel, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = val; @@ -854,11 +881,9 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = val; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = val; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = status; @@ -887,7 +912,7 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char status) { unsigned char channel = event->data.control.channel & 0x0f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; data->pg.status = status; data->pg.channel = channel; @@ -924,8 +949,9 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, { unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; unsigned char msb, lsb; + int ret; msb = (event->data.control.value >> 7) & 0x7f; lsb = event->data.control.value & 0x7f; @@ -939,28 +965,27 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = lsb; return 0; // skip case UMP_CC_RPN_MSB: - cc->cc_rpn_msb = msb; - fallthrough; case UMP_CC_RPN_LSB: - cc->rpn_set = 1; + ret = fill_rpn(cc, data, channel, true); + cc->cc_rpn_msb = msb; cc->cc_rpn_lsb = lsb; - return 0; // skip + cc->rpn_set = 1; + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: - cc->cc_nrpn_msb = msb; - fallthrough; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); + cc->cc_nrpn_msb = msb; cc->nrpn_set = 1; cc->cc_nrpn_lsb = lsb; - return 0; // skip + return ret; case UMP_CC_DATA: - cc->cc_data_msb = msb; - fallthrough; case UMP_CC_DATA_LSB: + cc->cc_data_msb_set = cc->cc_data_lsb_set = 1; + cc->cc_data_msb = msb; cc->cc_data_lsb = lsb; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = UMP_MSG_STATUS_CC; @@ -1192,44 +1217,53 @@ static int cvt_sysex_to_ump(struct snd_seq_client *dest, { struct snd_seq_ump_event ev_cvt; unsigned char status; - u8 buf[6], *xbuf; + u8 buf[8], *xbuf; int offset = 0; int len, err; + bool finished = false; if (!snd_seq_ev_is_variable(event)) return 0; setup_ump_event(&ev_cvt, event); - for (;;) { + while (!finished) { len = snd_seq_expand_var_event_at(event, sizeof(buf), buf, offset); if (len <= 0) break; - if (WARN_ON(len > 6)) + if (WARN_ON(len > sizeof(buf))) break; - offset += len; + xbuf = buf; + status = UMP_SYSEX_STATUS_CONTINUE; + /* truncate the sysex start-marker */ if (*xbuf == UMP_MIDI1_MSG_SYSEX_START) { status = UMP_SYSEX_STATUS_START; - xbuf++; len--; - if (len > 0 && xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + offset++; + xbuf++; + } + + /* if the last of this packet or the 1st byte of the next packet + * is the end-marker, finish the transfer with this packet + */ + if (len > 0 && len < 8 && + xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + if (status == UMP_SYSEX_STATUS_START) status = UMP_SYSEX_STATUS_SINGLE; - len--; - } - } else { - if (xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + else status = UMP_SYSEX_STATUS_END; - len--; - } else { - status = UMP_SYSEX_STATUS_CONTINUE; - } + len--; + finished = true; } + + len = min(len, 6); fill_sysex7_ump(dest_port, ev_cvt.ump, status, xbuf, len); err = __snd_seq_deliver_single_event(dest, dest_port, (struct snd_seq_event *)&ev_cvt, atomic, hop); if (err < 0) return err; + offset += len; } return 0; } diff --git a/sound/core/timer.c b/sound/core/timer.c index d104adc75a8b0ebad1a43687b9006a39d7af4658..71a07c1662f5c4fc33dc11799225c710d03ef309 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ - if (start) { + if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } diff --git a/sound/core/ump.c b/sound/core/ump.c index 3f61220c23b4ec028faddd63bff776a91cc8067c..0f0d7e895c5aa920f7533832de8440a6621a7a3a 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -733,6 +733,12 @@ static void fill_fb_info(struct snd_ump_endpoint *ump, info->block_id, info->direction, info->active, info->first_group, info->num_groups, info->midi_ci_version, info->sysex8_streams, info->flags); + + if ((info->flags & SNDRV_UMP_BLOCK_IS_MIDI1) && info->num_groups != 1) { + info->num_groups = 1; + ump_dbg(ump, "FB %d: corrected groups to 1 for MIDI1\n", + info->block_id); + } } /* check whether the FB info gets updated by the current message */ @@ -806,6 +812,13 @@ static int ump_handle_fb_name_msg(struct snd_ump_endpoint *ump, if (!fb) return -ENODEV; + if (ump->parsed && + (ump->info.flags & SNDRV_UMP_EP_INFO_STATIC_BLOCKS)) { + ump_dbg(ump, "Skipping static FB name update (blk#%d)\n", + fb->info.block_id); + return 0; + } + ret = ump_append_string(ump, fb->info.name, sizeof(fb->info.name), buf->raw, 3); /* notify the FB name update to sequencer, too */ diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c index f67c44c83fde445258c59f34897b73a913c17cf1..0fe13d03165686de984f43d01a716fc31896f56d 100644 --- a/sound/core/ump_convert.c +++ b/sound/core/ump_convert.c @@ -287,25 +287,42 @@ static int cvt_legacy_system_to_ump(struct ump_cvt_to_ump *cvt, return 4; } -static void fill_rpn(struct ump_cvt_to_ump_bank *cc, - union snd_ump_midi2_msg *midi2) +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) { + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *midi2, + bool flush) +{ + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { midi2->rpn.status = UMP_MSG_STATUS_RPN; midi2->rpn.bank = cc->cc_rpn_msb; midi2->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { midi2->rpn.status = UMP_MSG_STATUS_NRPN; midi2->rpn.bank = cc->cc_nrpn_msb; midi2->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + midi2->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert to a MIDI 1.0 Channel Voice message */ @@ -318,6 +335,7 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, struct ump_cvt_to_ump_bank *cc; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)data; unsigned char status, channel; + int ret; BUILD_BUG_ON(sizeof(union snd_ump_midi1_msg) != 4); BUILD_BUG_ON(sizeof(union snd_ump_midi2_msg) != 8); @@ -358,24 +376,33 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, case UMP_MSG_STATUS_CC: switch (buf[1]) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_msb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_lsb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = buf[2]; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = buf[2]; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = buf[2]; - return 0; // skip + return fill_rpn(cc, midi2, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = buf[2]; @@ -385,12 +412,9 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, cc->cc_bank_lsb = buf[2]; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = buf[2]; - if (cc->rpn_set || cc->nrpn_set) - fill_rpn(cc, midi2); - else - return 0; // skip - break; + return fill_rpn(cc, midi2, false); default: midi2->cc.index = buf[1]; midi2->cc.data = upscale_7_to_32bit(buf[2]); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index d35d0a420ee088ade28cbe3212c0e9d6d3199594..c827d7d8d8003564f48f091134ef2666dd06a852 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -77,6 +77,8 @@ // overrun. Actual device can skip more, then this module stops the packet streaming. #define IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES 5 +static void pcm_period_work(struct work_struct *work); + /** * amdtp_stream_init - initialize an AMDTP stream structure * @s: the AMDTP stream to initialize @@ -105,6 +107,7 @@ int amdtp_stream_init(struct amdtp_stream *s, struct fw_unit *unit, s->flags = flags; s->context = ERR_PTR(-1); mutex_init(&s->mutex); + INIT_WORK(&s->period_work, pcm_period_work); s->packet_index = 0; init_waitqueue_head(&s->ready_wait); @@ -347,6 +350,7 @@ EXPORT_SYMBOL(amdtp_stream_get_max_payload); */ void amdtp_stream_pcm_prepare(struct amdtp_stream *s) { + cancel_work_sync(&s->period_work); s->pcm_buffer_pointer = 0; s->pcm_period_pointer = 0; } @@ -611,19 +615,21 @@ static void update_pcm_pointers(struct amdtp_stream *s, // The program in user process should periodically check the status of intermediate // buffer associated to PCM substream to process PCM frames in the buffer, instead // of receiving notification of period elapsed by poll wait. - if (!pcm->runtime->no_period_wakeup) { - if (in_softirq()) { - // In software IRQ context for 1394 OHCI. - snd_pcm_period_elapsed(pcm); - } else { - // In process context of ALSA PCM application under acquired lock of - // PCM substream. - snd_pcm_period_elapsed_under_stream_lock(pcm); - } - } + if (!pcm->runtime->no_period_wakeup) + queue_work(system_highpri_wq, &s->period_work); } } +static void pcm_period_work(struct work_struct *work) +{ + struct amdtp_stream *s = container_of(work, struct amdtp_stream, + period_work); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); + + if (pcm) + snd_pcm_period_elapsed(pcm); +} + static int queue_packet(struct amdtp_stream *s, struct fw_iso_packet *params, bool sched_irq) { @@ -1180,8 +1186,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_ (void)fw_card_read_cycle_time(fw_parent_device(s->unit)->card, &curr_cycle_time); for (i = 0; i < packets; ++i) { - DEFINE_FLEX(struct fw_iso_packet, template, header, - header_length, CIP_HEADER_QUADLETS); + DEFINE_RAW_FLEX(struct fw_iso_packet, template, header, CIP_HEADER_QUADLETS); bool sched_irq = false; build_it_pkt_header(s, desc->cycle, template, pkt_header_length, @@ -1850,11 +1855,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d, { struct amdtp_stream *irq_target = d->irq_target; - // Process isochronous packets queued till recent isochronous cycle to handle PCM frames. if (irq_target && amdtp_stream_running(irq_target)) { - // In software IRQ context, the call causes dead-lock to disable the tasklet - // synchronously. - if (!in_softirq()) + // use wq to prevent AB/BA deadlock competition for + // substream lock: + // fw_iso_context_flush_completions() acquires + // lock by ohci_flush_iso_completions(), + // amdtp-stream process_rx_packets() attempts to + // acquire same lock by snd_pcm_elapsed() + if (current_work() != &s->period_work) fw_iso_context_flush_completions(irq_target->context); } @@ -1910,6 +1918,7 @@ static void amdtp_stream_stop(struct amdtp_stream *s) return; } + cancel_work_sync(&s->period_work); fw_iso_context_stop(s->context); fw_iso_context_destroy(s->context); s->context = ERR_PTR(-1); diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index a1ed2e80f91a7cc07fecf55288a20a240d0900da..775db3fc4959f57d006a247d5f5d6d74586a110c 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -191,6 +191,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; + struct work_struct period_work; snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; unsigned int pcm_frame_multiplier; diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 4b411ed8c3fe0e794a86c13f33351cb7be27b2b1..d68bf7591d90c19a601499d9c20612d8c1a12603 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -133,19 +133,8 @@ static const struct reg_sequence cs35l41_hda_mute[] = { { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_HPF_PCM_EN = 1, AMP_VOL_PCM Mute }; -static void cs35l41_add_controls(struct cs35l41_hda *cs35l41) -{ - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l41->amp_name; - info.fw_type = cs35l41->firmware_type; - info.card = cs35l41->codec->card; - - hda_cs_dsp_add_controls(&cs35l41->cs_dsp, &info); -} - static const struct cs_dsp_client_ops client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l41_request_tuning_param_file(struct cs35l41_hda *cs35l41, char *tuning_filename, @@ -603,8 +592,6 @@ static int cs35l41_init_dsp(struct cs35l41_hda *cs35l41) if (ret) goto err; - cs35l41_add_controls(cs35l41); - cs35l41_hda_apply_calibration(cs35l41); err: diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 96d3f13c5abf9f24636294a8262b54b579e08269..a9dfd62637cf4cdbab2cca065751d459bdf34ab3 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -413,7 +413,7 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) } static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, @@ -559,18 +559,6 @@ static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmw kfree(coeff_filename); } -static void cs35l56_hda_create_dsp_controls_work(struct work_struct *work) -{ - struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, control_work); - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l56->amp_name; - info.fw_type = HDA_CS_DSP_FW_MISC; - info.card = cs35l56->codec->card; - - hda_cs_dsp_add_controls(&cs35l56->cs_dsp, &info); -} - static void cs35l56_hda_apply_calibration(struct cs35l56_hda *cs35l56) { int ret; @@ -595,26 +583,15 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) char *wmfw_filename = NULL; unsigned int preloaded_fw_ver; bool firmware_missing; - bool add_dsp_controls_required = false; int ret; - /* - * control_work must be flushed before proceeding, but we can't do that - * here as it would create a deadlock on controls_rwsem so it must be - * performed before queuing dsp_work. - */ - WARN_ON_ONCE(work_busy(&cs35l56->control_work)); - /* * Prepare for a new DSP power-up. If the DSP has had firmware * downloaded previously then it needs to be powered down so that it - * can be updated and if hadn't been patched before then the controls - * will need to be added once firmware download succeeds. + * can be updated. */ if (cs35l56->base.fw_patched) cs_dsp_power_down(&cs35l56->cs_dsp); - else - add_dsp_controls_required = true; cs35l56->base.fw_patched = false; @@ -698,15 +675,6 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) CS35L56_FIRMWARE_MISSING); cs35l56->base.fw_patched = true; - /* - * Adding controls is deferred to prevent a lock inversion - ALSA takes - * the controls_rwsem when adding a control, the get() / put() - * functions of a control are called holding controls_rwsem and those - * that depend on running firmware wait for dsp_work() to complete. - */ - if (add_dsp_controls_required) - queue_work(system_long_wq, &cs35l56->control_work); - ret = cs_dsp_run(&cs35l56->cs_dsp); if (ret) dev_dbg(cs35l56->base.dev, "%s: cs_dsp_run ret %d\n", __func__, ret); @@ -753,7 +721,6 @@ static int cs35l56_hda_bind(struct device *dev, struct device *master, void *mas strscpy(comp->name, dev_name(dev), sizeof(comp->name)); comp->playback_hook = cs35l56_hda_playback_hook; - flush_work(&cs35l56->control_work); queue_work(system_long_wq, &cs35l56->dsp_work); cs35l56_hda_create_controls(cs35l56); @@ -775,7 +742,6 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * struct hda_component *comp; cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->control_work); cs35l56_hda_remove_controls(cs35l56); @@ -806,7 +772,6 @@ static int cs35l56_hda_system_suspend(struct device *dev) struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); cs35l56_hda_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->control_work); if (cs35l56->playing) cs35l56_hda_pause(cs35l56); @@ -1026,7 +991,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) dev_set_drvdata(cs35l56->base.dev, cs35l56); INIT_WORK(&cs35l56->dsp_work, cs35l56_hda_dsp_work); - INIT_WORK(&cs35l56->control_work, cs35l56_hda_create_dsp_controls_work); ret = cs35l56_hda_read_acpi(cs35l56, hid, id); if (ret) diff --git a/sound/pci/hda/cs35l56_hda.h b/sound/pci/hda/cs35l56_hda.h index c40d159507c26d51568aabdd75b6c37facceebbe..38d94fb213a50a999136707a91893798b76357b8 100644 --- a/sound/pci/hda/cs35l56_hda.h +++ b/sound/pci/hda/cs35l56_hda.h @@ -23,7 +23,6 @@ struct cs35l56_hda { struct cs35l56_base base; struct hda_codec *codec; struct work_struct dsp_work; - struct work_struct control_work; int index; const char *system_name; diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index c2d0109866e62e5a93f915ad202ce71c0f84d6b9..68c883f202ca5bac763b0b07a81084067df4ec60 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_AMD_ALLOC_FIX (1 << 14) /* AMD allocation workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index f64d9dc197a31af31a2cc80a5148830654fd9fb9..9cff87dfbecbb1fffa35a3d8819ac82c51571bb5 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4955,6 +4955,69 @@ void snd_hda_gen_stream_pm(struct hda_codec *codec, hda_nid_t nid, bool on) } EXPORT_SYMBOL_GPL(snd_hda_gen_stream_pm); +/* forcibly mute the speaker output without caching; return true if updated */ +static bool force_mute_output_path(struct hda_codec *codec, hda_nid_t nid) +{ + if (!nid) + return false; + if (!nid_has_mute(codec, nid, HDA_OUTPUT)) + return false; /* no mute, skip */ + if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) & + snd_hda_codec_amp_read(codec, nid, 1, HDA_OUTPUT, 0) & + HDA_AMP_MUTE) + return false; /* both channels already muted, skip */ + + /* direct amp update without caching */ + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, + AC_AMP_SET_OUTPUT | AC_AMP_SET_LEFT | + AC_AMP_SET_RIGHT | HDA_AMP_MUTE); + return true; +} + +/** + * snd_hda_gen_shutup_speakers - Forcibly mute the speaker outputs + * @codec: the HDA codec + * + * Forcibly mute the speaker outputs, to be called at suspend or shutdown. + * + * The mute state done by this function isn't cached, hence the original state + * will be restored at resume. + * + * Return true if the mute state has been changed. + */ +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + const int *paths; + const struct nid_path *path; + int i, p, num_paths; + bool updated = false; + + /* if already powered off, do nothing */ + if (!snd_hdac_is_power_on(&codec->core)) + return false; + + if (spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) { + paths = spec->out_paths; + num_paths = spec->autocfg.line_outs; + } else { + paths = spec->speaker_paths; + num_paths = spec->autocfg.speaker_outs; + } + + for (i = 0; i < num_paths; i++) { + path = snd_hda_get_path_from_idx(codec, paths[i]); + if (!path) + continue; + for (p = 0; p < path->depth; p++) + if (force_mute_output_path(codec, path->path[p])) + updated = true; + } + + return updated; +} +EXPORT_SYMBOL_GPL(snd_hda_gen_shutup_speakers); + /** * snd_hda_gen_parse_auto_config - Parse the given BIOS configuration and * set up the hda_gen_spec diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 8f5ecf740c4917051ee9c2f0a5d0666109f70dc7..08544601b4ce29865035734fa59901159dfbeff1 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -353,5 +353,6 @@ int snd_hda_gen_add_mute_led_cdev(struct hda_codec *codec, int snd_hda_gen_add_micmute_led_cdev(struct hda_codec *codec, int (*callback)(struct led_classdev *, enum led_brightness)); +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec); #endif /* __SOUND_HDA_GENERIC_H */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b33602e64d174c8af1bf196ee5b0ce90668f9681..97d33a48ff17c80628cef26f1e690fdcf57dd0d8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86 /* for snoop control */ +#include #include #include #endif @@ -306,7 +307,7 @@ enum { /* quirks for ATI HDMI with snoop off */ #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ - (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) + (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_AMD_ALLOC_FIX) /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ @@ -1702,6 +1703,13 @@ static void azx_check_snoop_available(struct azx *chip) if (chip->driver_caps & AZX_DCAPS_SNOOP_OFF) snoop = false; +#ifdef CONFIG_X86 + /* check the presence of DMA ops (i.e. IOMMU), disable snoop conditionally */ + if ((chip->driver_caps & AZX_DCAPS_AMD_ALLOC_FIX) && + !get_dma_ops(chip->card->dev)) + snoop = false; +#endif + chip->snoop = snoop; if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 17389a3801bd12976f3552e3e89cbbf4cd18ebed..f030669243f9a501e3a65a2d991f770fa1b8f4d4 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,12 +21,6 @@ #include "hda_jack.h" #include "hda_generic.h" -enum { - CX_HEADSET_NOPRESENT = 0, - CX_HEADSET_PARTPRESENT, - CX_HEADSET_ALLPRESENT, -}; - struct conexant_spec { struct hda_gen_spec gen; @@ -48,7 +42,6 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - unsigned int headset_present_flag; bool is_cx8070_sn6140; }; @@ -212,6 +205,8 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; + snd_hda_gen_shutup_speakers(codec); + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); @@ -250,48 +245,19 @@ static void cx_process_headset_plugin(struct hda_codec *codec) } } -static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +static void cx_update_headset_mic_vref(struct hda_codec *codec, struct hda_jack_callback *event) { - unsigned int phone_present, mic_persent, phone_tag, mic_tag; - struct conexant_spec *spec = codec->spec; + unsigned int mic_present; /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, * the node 19 can only be config to microphone or disabled. * Check hp&mic tag to process headset pulgin&plugout. */ - phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || - (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { - phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); - if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ - spec->headset_present_flag = CX_HEADSET_NOPRESENT; - snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); - return; - } - if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { - spec->headset_present_flag = CX_HEADSET_PARTPRESENT; - } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { - mic_persent = snd_hda_codec_read(codec, 0x19, 0, - AC_VERB_GET_PIN_SENSE, 0x0); - /* headset is present */ - if ((phone_present & AC_PINSENSE_PRESENCE) && - (mic_persent & AC_PINSENSE_PRESENCE)) { - cx_process_headset_plugin(codec); - spec->headset_present_flag = CX_HEADSET_ALLPRESENT; - } - } - } -} - -static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) -{ - struct conexant_spec *spec = codec->spec; - - if (spec->is_cx8070_sn6140) - cx_update_headset_mic_vref(codec, res); - - snd_hda_jack_unsol_event(codec, res); + mic_present = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(mic_present & AC_PINSENSE_PRESENCE)) /* mic plugout */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + else + cx_process_headset_plugin(codec); } static int cx_auto_suspend(struct hda_codec *codec) @@ -305,7 +271,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = cx_jack_unsol_event, + .unsol_event = snd_hda_jack_unsol_event, .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, }; @@ -1163,7 +1129,7 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f11f86: case 0x14f11f87: spec->is_cx8070_sn6140 = true; - spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); break; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 707d203ba6527654e3a510112a3658a084aa7c4c..78042ac2b71f217d671dff14f9af8b5f1b48f665 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1989,6 +1989,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1), + SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c3a86a99f8c6d8f6a0f7b3df809f117082e68514..d022a25635f9bf7cf751fb6f967005df98f08c47 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11,15 +11,18 @@ */ #include +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -583,7 +586,6 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: - case 0x10ec0257: case 0x19e58326: case 0x10ec0283: case 0x10ec0285: @@ -4800,6 +4802,8 @@ static void alc298_fixup_samsung_amp(struct hda_codec *codec, } } +#include "samsung_helper.c" + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -6854,6 +6858,86 @@ static void comp_generic_fixup(struct hda_codec *cdc, int action, const char *bu } } +static void cs35lxx_autodet_fixup(struct hda_codec *cdc, + const struct hda_fixup *fix, + int action) +{ + struct device *dev = hda_codec_dev(cdc); + struct acpi_device *adev; + struct fwnode_handle *fwnode __free(fwnode_handle) = NULL; + const char *bus = NULL; + static const struct { + const char *hid; + const char *name; + } acpi_ids[] = {{ "CSC3554", "cs35l54-hda" }, + { "CSC3556", "cs35l56-hda" }, + { "CSC3557", "cs35l57-hda" }}; + char *match; + int i, count = 0, count_devindex = 0; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + for (i = 0; i < ARRAY_SIZE(acpi_ids); ++i) { + adev = acpi_dev_get_first_match_dev(acpi_ids[i].hid, NULL, -1); + if (adev) + break; + } + if (!adev) { + dev_err(dev, "Failed to find ACPI entry for a Cirrus Amp\n"); + return; + } + + count = i2c_acpi_client_count(adev); + if (count > 0) { + bus = "i2c"; + } else { + count = acpi_spi_count_resources(adev); + if (count > 0) + bus = "spi"; + } + + fwnode = fwnode_handle_get(acpi_fwnode_handle(adev)); + acpi_dev_put(adev); + + if (!bus) { + dev_err(dev, "Did not find any buses for %s\n", acpi_ids[i].hid); + return; + } + + if (!fwnode) { + dev_err(dev, "Could not get fwnode for %s\n", acpi_ids[i].hid); + return; + } + + /* + * When available the cirrus,dev-index property is an accurate + * count of the amps in a system and is used in preference to + * the count of bus devices that can contain additional address + * alias entries. + */ + count_devindex = fwnode_property_count_u32(fwnode, "cirrus,dev-index"); + if (count_devindex > 0) + count = count_devindex; + + match = devm_kasprintf(dev, GFP_KERNEL, "-%%s:00-%s.%%d", acpi_ids[i].name); + if (!match) + return; + dev_info(dev, "Found %d %s on %s (%s)\n", count, acpi_ids[i].hid, bus, match); + comp_generic_fixup(cdc, action, bus, acpi_ids[i].hid, match, count); + + break; + case HDA_FIXUP_ACT_FREE: + /* + * Pass the action on to comp_generic_fixup() so that + * hda_component_manager functions can be called in just once + * place. In this context the bus, hid, match_str or count + * values do not need to be calculated. + */ + comp_generic_fixup(cdc, action, NULL, NULL, NULL, 0); + break; + } +} + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "i2c", "CSC3551", "-%s:00-cs35l41-hda.%d", 2); @@ -7429,6 +7513,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, ALC298_FIXUP_SAMSUNG_AMP, + ALC298_FIXUP_SAMSUNG_AMP2, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -7525,6 +7610,7 @@ enum { ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, ALC287_FIXUP_LENOVO_SSID_17AA3820, + ALCXXX_FIXUP_CS35LXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9055,6 +9141,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET }, + [ALC298_FIXUP_SAMSUNG_AMP2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_samsung_amp2 + }, [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9850,6 +9940,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_ssid_17aa3820, }, + [ALCXXX_FIXUP_CS35LXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35lxx_autodet_fixup, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9865,6 +9959,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), @@ -10263,6 +10358,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d08, "HP EliteBook 1045 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite x360 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8c, "HP EliteBook 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8d, "HP Elite x360 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8e, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8f, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 860 16 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALCXXX_FIXUP_CS35LXX), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -10359,10 +10465,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), - SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), @@ -10406,6 +10512,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG-KB1US)", ALC298_FIXUP_SAMSUNG_AMP2), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), @@ -10669,6 +10776,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. @@ -10843,6 +10951,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_AMP, .name = "alc298-samsung-amp"}, + {.id = ALC298_FIXUP_SAMSUNG_AMP2, .name = "alc298-samsung-amp2"}, {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, diff --git a/sound/pci/hda/samsung_helper.c b/sound/pci/hda/samsung_helper.c new file mode 100644 index 0000000000000000000000000000000000000000..a40175b690157bd244fbc272909ca754acb1cde4 --- /dev/null +++ b/sound/pci/hda/samsung_helper.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Helper functions for Samsung Galaxy Book3 audio initialization */ + +struct alc298_samsung_coeff_fixup_desc { + unsigned char coeff_idx; + unsigned short coeff_value; +}; + +struct alc298_samsung_coeff_seq_desc { + unsigned short coeff_0x23; + unsigned short coeff_0x24; + unsigned short coeff_0x25; + unsigned short coeff_0x26; +}; + + +static inline void alc298_samsung_write_coef_pack2(struct hda_codec *codec, + const struct alc298_samsung_coeff_seq_desc *seq) +{ + int i; + + for (i = 0; i < 100; i++) { + if ((alc_read_coef_idx(codec, 0x26) & 0x0010) == 0) + break; + + usleep_range(500, 1000); + } + + alc_write_coef_idx(codec, 0x23, seq->coeff_0x23); + alc_write_coef_idx(codec, 0x24, seq->coeff_0x24); + alc_write_coef_idx(codec, 0x25, seq->coeff_0x25); + alc_write_coef_idx(codec, 0x26, seq->coeff_0x26); +} + +static inline void alc298_samsung_write_coef_pack_seq( + struct hda_codec *codec, + unsigned char target, + const struct alc298_samsung_coeff_seq_desc seq[], + int count) +{ + alc_write_coef_idx(codec, 0x22, target); + for (int i = 0; i < count; i++) + alc298_samsung_write_coef_pack2(codec, &seq[i]); +} + +static void alc298_fixup_samsung_amp2(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + int i; + static const struct alc298_samsung_coeff_fixup_desc fixups1[] = { + { 0x99, 0x8000 }, { 0x82, 0x4408 }, { 0x32, 0x3f00 }, { 0x0e, 0x6f80 }, + { 0x10, 0x0e21 }, { 0x55, 0x8000 }, { 0x08, 0x2fcf }, { 0x08, 0x2fcf }, + { 0x2d, 0xc020 }, { 0x19, 0x0017 }, { 0x50, 0x1000 }, { 0x0e, 0x6f80 }, + { 0x08, 0x2fcf }, { 0x80, 0x0011 }, { 0x2b, 0x0c10 }, { 0x2d, 0xc020 }, + { 0x03, 0x0042 }, { 0x0f, 0x0062 }, { 0x08, 0x2fcf }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x38[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0001, 0xb011 }, { 0x201d, 0x0000, 0x0001, 0xb011 }, + { 0x201f, 0x0000, 0x00fe, 0xb011 }, { 0x2021, 0x0000, 0x0000, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x2399, 0x0000, 0x0003, 0xb011 }, + { 0x23a4, 0x0000, 0x00b5, 0xb011 }, { 0x23a5, 0x0000, 0x0001, 0xb011 }, + { 0x23ba, 0x0000, 0x0094, 0xb011 }, { 0x2100, 0x00d0, 0x950e, 0xb017 }, + { 0x2104, 0x0061, 0xd4e2, 0xb017 }, { 0x2108, 0x00d0, 0x950e, 0xb017 }, + { 0x210c, 0x0075, 0xf4e2, 0xb017 }, { 0x2110, 0x00b4, 0x4b0d, 0xb017 }, + { 0x2114, 0x000a, 0x1000, 0xb017 }, { 0x2118, 0x0015, 0x2000, 0xb017 }, + { 0x211c, 0x000a, 0x1000, 0xb017 }, { 0x2120, 0x0075, 0xf4e2, 0xb017 }, + { 0x2124, 0x00b4, 0x4b0d, 0xb017 }, { 0x2128, 0x0000, 0x0010, 0xb017 }, + { 0x212c, 0x0000, 0x0000, 0xb017 }, { 0x2130, 0x0000, 0x0000, 0xb017 }, + { 0x2134, 0x0000, 0x0000, 0xb017 }, { 0x2138, 0x0000, 0x0000, 0xb017 }, + { 0x213c, 0x0000, 0x0010, 0xb017 }, { 0x2140, 0x0000, 0x0000, 0xb017 }, + { 0x2144, 0x0000, 0x0000, 0xb017 }, { 0x2148, 0x0000, 0x0000, 0xb017 }, + { 0x214c, 0x0000, 0x0000, 0xb017 }, { 0x2150, 0x0000, 0x0010, 0xb017 }, + { 0x2154, 0x0000, 0x0000, 0xb017 }, { 0x2158, 0x0000, 0x0000, 0xb017 }, + { 0x215c, 0x0000, 0x0000, 0xb017 }, { 0x2160, 0x0000, 0x0000, 0xb017 }, + { 0x2164, 0x0000, 0x0010, 0xb017 }, { 0x2168, 0x0000, 0x0000, 0xb017 }, + { 0x216c, 0x0000, 0x0000, 0xb017 }, { 0x2170, 0x0000, 0x0000, 0xb017 }, + { 0x2174, 0x0000, 0x0000, 0xb017 }, { 0x2178, 0x0000, 0x0010, 0xb017 }, + { 0x217c, 0x0000, 0x0000, 0xb017 }, { 0x2180, 0x0000, 0x0000, 0xb017 }, + { 0x2184, 0x0000, 0x0000, 0xb017 }, { 0x2188, 0x0000, 0x0000, 0xb017 }, + { 0x218c, 0x0064, 0x5800, 0xb017 }, { 0x2190, 0x00c8, 0xb000, 0xb017 }, + { 0x2194, 0x0064, 0x5800, 0xb017 }, { 0x2198, 0x003d, 0x5be7, 0xb017 }, + { 0x219c, 0x0054, 0x060a, 0xb017 }, { 0x21a0, 0x00c8, 0xa310, 0xb017 }, + { 0x21a4, 0x0029, 0x4de5, 0xb017 }, { 0x21a8, 0x0032, 0x420c, 0xb017 }, + { 0x21ac, 0x0029, 0x4de5, 0xb017 }, { 0x21b0, 0x00fa, 0xe50c, 0xb017 }, + { 0x21b4, 0x0000, 0x0010, 0xb017 }, { 0x21b8, 0x0000, 0x0000, 0xb017 }, + { 0x21bc, 0x0000, 0x0000, 0xb017 }, { 0x21c0, 0x0000, 0x0000, 0xb017 }, + { 0x21c4, 0x0000, 0x0000, 0xb017 }, { 0x21c8, 0x0056, 0xc50f, 0xb017 }, + { 0x21cc, 0x007b, 0xd7e1, 0xb017 }, { 0x21d0, 0x0077, 0xa70e, 0xb017 }, + { 0x21d4, 0x00e0, 0xbde1, 0xb017 }, { 0x21d8, 0x0032, 0x530e, 0xb017 }, + { 0x2204, 0x00fb, 0x7e0f, 0xb017 }, { 0x2208, 0x000b, 0x02e1, 0xb017 }, + { 0x220c, 0x00fb, 0x7e0f, 0xb017 }, { 0x2210, 0x00d5, 0x17e1, 0xb017 }, + { 0x2214, 0x00c0, 0x130f, 0xb017 }, { 0x2218, 0x00e5, 0x0a00, 0xb017 }, + { 0x221c, 0x00cb, 0x1500, 0xb017 }, { 0x2220, 0x00e5, 0x0a00, 0xb017 }, + { 0x2224, 0x00d5, 0x17e1, 0xb017 }, { 0x2228, 0x00c0, 0x130f, 0xb017 }, + { 0x222c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2230, 0x0017, 0x48e2, 0xb017 }, + { 0x2234, 0x00f5, 0xdb0e, 0xb017 }, { 0x2238, 0x00ef, 0x5ce2, 0xb017 }, + { 0x223c, 0x00c1, 0xcc0d, 0xb017 }, { 0x2240, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2244, 0x0017, 0x48e2, 0xb017 }, { 0x2248, 0x00f5, 0xdb0e, 0xb017 }, + { 0x224c, 0x00ef, 0x5ce2, 0xb017 }, { 0x2250, 0x00c1, 0xcc0d, 0xb017 }, + { 0x2254, 0x00f5, 0xdb0e, 0xb017 }, { 0x2258, 0x0017, 0x48e2, 0xb017 }, + { 0x225c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2260, 0x00ef, 0x5ce2, 0xb017 }, + { 0x2264, 0x00c1, 0xcc0d, 0xb017 }, { 0x2268, 0x00f5, 0xdb0e, 0xb017 }, + { 0x226c, 0x0017, 0x48e2, 0xb017 }, { 0x2270, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2274, 0x00ef, 0x5ce2, 0xb017 }, { 0x2278, 0x00c1, 0xcc0d, 0xb017 }, + { 0x227c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2280, 0x0017, 0x48e2, 0xb017 }, + { 0x2284, 0x00f5, 0xdb0e, 0xb017 }, { 0x2288, 0x00ef, 0x5ce2, 0xb017 }, + { 0x228c, 0x00c1, 0xcc0d, 0xb017 }, { 0x22cc, 0x00e8, 0x8d00, 0xb017 }, + { 0x22d0, 0x0000, 0x0000, 0xb017 }, { 0x22d4, 0x0018, 0x72ff, 0xb017 }, + { 0x22d8, 0x00ce, 0x25e1, 0xb017 }, { 0x22dc, 0x002f, 0xe40e, 0xb017 }, + { 0x238e, 0x0000, 0x0099, 0xb011 }, { 0x238f, 0x0000, 0x0011, 0xb011 }, + { 0x2390, 0x0000, 0x0056, 0xb011 }, { 0x2391, 0x0000, 0x0004, 0xb011 }, + { 0x2392, 0x0000, 0x00bb, 0xb011 }, { 0x2393, 0x0000, 0x006d, 0xb011 }, + { 0x2394, 0x0000, 0x0010, 0xb011 }, { 0x2395, 0x0000, 0x0064, 0xb011 }, + { 0x2396, 0x0000, 0x00b6, 0xb011 }, { 0x2397, 0x0000, 0x0028, 0xb011 }, + { 0x2398, 0x0000, 0x000b, 0xb011 }, { 0x239a, 0x0000, 0x0099, 0xb011 }, + { 0x239b, 0x0000, 0x000d, 0xb011 }, { 0x23a6, 0x0000, 0x0064, 0xb011 }, + { 0x23a7, 0x0000, 0x0078, 0xb011 }, { 0x23b9, 0x0000, 0x0000, 0xb011 }, + { 0x23e0, 0x0000, 0x0021, 0xb011 }, { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x39[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0002, 0xb011 }, { 0x201d, 0x0000, 0x0002, 0xb011 }, + { 0x201f, 0x0000, 0x00fd, 0xb011 }, { 0x2021, 0x0000, 0x0001, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x2399, 0x0000, 0x0003, 0xb011 }, + { 0x23a4, 0x0000, 0x00b5, 0xb011 }, { 0x23a5, 0x0000, 0x0001, 0xb011 }, + { 0x23ba, 0x0000, 0x0094, 0xb011 }, { 0x2100, 0x00d0, 0x950e, 0xb017 }, + { 0x2104, 0x0061, 0xd4e2, 0xb017 }, { 0x2108, 0x00d0, 0x950e, 0xb017 }, + { 0x210c, 0x0075, 0xf4e2, 0xb017 }, { 0x2110, 0x00b4, 0x4b0d, 0xb017 }, + { 0x2114, 0x000a, 0x1000, 0xb017 }, { 0x2118, 0x0015, 0x2000, 0xb017 }, + { 0x211c, 0x000a, 0x1000, 0xb017 }, { 0x2120, 0x0075, 0xf4e2, 0xb017 }, + { 0x2124, 0x00b4, 0x4b0d, 0xb017 }, { 0x2128, 0x0000, 0x0010, 0xb017 }, + { 0x212c, 0x0000, 0x0000, 0xb017 }, { 0x2130, 0x0000, 0x0000, 0xb017 }, + { 0x2134, 0x0000, 0x0000, 0xb017 }, { 0x2138, 0x0000, 0x0000, 0xb017 }, + { 0x213c, 0x0000, 0x0010, 0xb017 }, { 0x2140, 0x0000, 0x0000, 0xb017 }, + { 0x2144, 0x0000, 0x0000, 0xb017 }, { 0x2148, 0x0000, 0x0000, 0xb017 }, + { 0x214c, 0x0000, 0x0000, 0xb017 }, { 0x2150, 0x0000, 0x0010, 0xb017 }, + { 0x2154, 0x0000, 0x0000, 0xb017 }, { 0x2158, 0x0000, 0x0000, 0xb017 }, + { 0x215c, 0x0000, 0x0000, 0xb017 }, { 0x2160, 0x0000, 0x0000, 0xb017 }, + { 0x2164, 0x0000, 0x0010, 0xb017 }, { 0x2168, 0x0000, 0x0000, 0xb017 }, + { 0x216c, 0x0000, 0x0000, 0xb017 }, { 0x2170, 0x0000, 0x0000, 0xb017 }, + { 0x2174, 0x0000, 0x0000, 0xb017 }, { 0x2178, 0x0000, 0x0010, 0xb017 }, + { 0x217c, 0x0000, 0x0000, 0xb017 }, { 0x2180, 0x0000, 0x0000, 0xb017 }, + { 0x2184, 0x0000, 0x0000, 0xb017 }, { 0x2188, 0x0000, 0x0000, 0xb017 }, + { 0x218c, 0x0064, 0x5800, 0xb017 }, { 0x2190, 0x00c8, 0xb000, 0xb017 }, + { 0x2194, 0x0064, 0x5800, 0xb017 }, { 0x2198, 0x003d, 0x5be7, 0xb017 }, + { 0x219c, 0x0054, 0x060a, 0xb017 }, { 0x21a0, 0x00c8, 0xa310, 0xb017 }, + { 0x21a4, 0x0029, 0x4de5, 0xb017 }, { 0x21a8, 0x0032, 0x420c, 0xb017 }, + { 0x21ac, 0x0029, 0x4de5, 0xb017 }, { 0x21b0, 0x00fa, 0xe50c, 0xb017 }, + { 0x21b4, 0x0000, 0x0010, 0xb017 }, { 0x21b8, 0x0000, 0x0000, 0xb017 }, + { 0x21bc, 0x0000, 0x0000, 0xb017 }, { 0x21c0, 0x0000, 0x0000, 0xb017 }, + { 0x21c4, 0x0000, 0x0000, 0xb017 }, { 0x21c8, 0x0056, 0xc50f, 0xb017 }, + { 0x21cc, 0x007b, 0xd7e1, 0xb017 }, { 0x21d0, 0x0077, 0xa70e, 0xb017 }, + { 0x21d4, 0x00e0, 0xbde1, 0xb017 }, { 0x21d8, 0x0032, 0x530e, 0xb017 }, + { 0x2204, 0x00fb, 0x7e0f, 0xb017 }, { 0x2208, 0x000b, 0x02e1, 0xb017 }, + { 0x220c, 0x00fb, 0x7e0f, 0xb017 }, { 0x2210, 0x00d5, 0x17e1, 0xb017 }, + { 0x2214, 0x00c0, 0x130f, 0xb017 }, { 0x2218, 0x00e5, 0x0a00, 0xb017 }, + { 0x221c, 0x00cb, 0x1500, 0xb017 }, { 0x2220, 0x00e5, 0x0a00, 0xb017 }, + { 0x2224, 0x00d5, 0x17e1, 0xb017 }, { 0x2228, 0x00c0, 0x130f, 0xb017 }, + { 0x222c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2230, 0x0017, 0x48e2, 0xb017 }, + { 0x2234, 0x00f5, 0xdb0e, 0xb017 }, { 0x2238, 0x00ef, 0x5ce2, 0xb017 }, + { 0x223c, 0x00c1, 0xcc0d, 0xb017 }, { 0x2240, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2244, 0x0017, 0x48e2, 0xb017 }, { 0x2248, 0x00f5, 0xdb0e, 0xb017 }, + { 0x224c, 0x00ef, 0x5ce2, 0xb017 }, { 0x2250, 0x00c1, 0xcc0d, 0xb017 }, + { 0x2254, 0x00f5, 0xdb0e, 0xb017 }, { 0x2258, 0x0017, 0x48e2, 0xb017 }, + { 0x225c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2260, 0x00ef, 0x5ce2, 0xb017 }, + { 0x2264, 0x00c1, 0xcc0d, 0xb017 }, { 0x2268, 0x00f5, 0xdb0e, 0xb017 }, + { 0x226c, 0x0017, 0x48e2, 0xb017 }, { 0x2270, 0x00f5, 0xdb0e, 0xb017 }, + { 0x2274, 0x00ef, 0x5ce2, 0xb017 }, { 0x2278, 0x00c1, 0xcc0d, 0xb017 }, + { 0x227c, 0x00f5, 0xdb0e, 0xb017 }, { 0x2280, 0x0017, 0x48e2, 0xb017 }, + { 0x2284, 0x00f5, 0xdb0e, 0xb017 }, { 0x2288, 0x00ef, 0x5ce2, 0xb017 }, + { 0x228c, 0x00c1, 0xcc0d, 0xb017 }, { 0x22cc, 0x00e8, 0x8d00, 0xb017 }, + { 0x22d0, 0x0000, 0x0000, 0xb017 }, { 0x22d4, 0x0018, 0x72ff, 0xb017 }, + { 0x22d8, 0x00ce, 0x25e1, 0xb017 }, { 0x22dc, 0x002f, 0xe40e, 0xb017 }, + { 0x238e, 0x0000, 0x0099, 0xb011 }, { 0x238f, 0x0000, 0x0011, 0xb011 }, + { 0x2390, 0x0000, 0x0056, 0xb011 }, { 0x2391, 0x0000, 0x0004, 0xb011 }, + { 0x2392, 0x0000, 0x00bb, 0xb011 }, { 0x2393, 0x0000, 0x006d, 0xb011 }, + { 0x2394, 0x0000, 0x0010, 0xb011 }, { 0x2395, 0x0000, 0x0064, 0xb011 }, + { 0x2396, 0x0000, 0x00b6, 0xb011 }, { 0x2397, 0x0000, 0x0028, 0xb011 }, + { 0x2398, 0x0000, 0x000b, 0xb011 }, { 0x239a, 0x0000, 0x0099, 0xb011 }, + { 0x239b, 0x0000, 0x000d, 0xb011 }, { 0x23a6, 0x0000, 0x0064, 0xb011 }, + { 0x23a7, 0x0000, 0x0078, 0xb011 }, { 0x23b9, 0x0000, 0x0000, 0xb011 }, + { 0x23e0, 0x0000, 0x0021, 0xb011 }, { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x3c[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0001, 0xb011 }, { 0x201d, 0x0000, 0x0001, 0xb011 }, + { 0x201f, 0x0000, 0x00fe, 0xb011 }, { 0x2021, 0x0000, 0x0000, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x23ba, 0x0000, 0x008d, 0xb011 }, + { 0x2128, 0x0005, 0x460d, 0xb017 }, { 0x212c, 0x00f6, 0x73e5, 0xb017 }, + { 0x2130, 0x0005, 0x460d, 0xb017 }, { 0x2134, 0x00c0, 0xe9e5, 0xb017 }, + { 0x2138, 0x00d5, 0x010b, 0xb017 }, { 0x213c, 0x009d, 0x7809, 0xb017 }, + { 0x2140, 0x00c5, 0x0eed, 0xb017 }, { 0x2144, 0x009d, 0x7809, 0xb017 }, + { 0x2148, 0x00c4, 0x4ef0, 0xb017 }, { 0x214c, 0x003a, 0x3106, 0xb017 }, + { 0x2150, 0x00af, 0x750e, 0xb017 }, { 0x2154, 0x008c, 0x1ff1, 0xb017 }, + { 0x2158, 0x009e, 0x360c, 0xb017 }, { 0x215c, 0x008c, 0x1ff1, 0xb017 }, + { 0x2160, 0x004d, 0xac0a, 0xb017 }, { 0x2164, 0x007d, 0xa00f, 0xb017 }, + { 0x2168, 0x00e1, 0x9ce3, 0xb017 }, { 0x216c, 0x00e8, 0x590e, 0xb017 }, + { 0x2170, 0x00e1, 0x9ce3, 0xb017 }, { 0x2174, 0x0066, 0xfa0d, 0xb017 }, + { 0x2178, 0x0000, 0x0010, 0xb017 }, { 0x217c, 0x0000, 0x0000, 0xb017 }, + { 0x2180, 0x0000, 0x0000, 0xb017 }, { 0x2184, 0x0000, 0x0000, 0xb017 }, + { 0x2188, 0x0000, 0x0000, 0xb017 }, { 0x218c, 0x0000, 0x0010, 0xb017 }, + { 0x2190, 0x0000, 0x0000, 0xb017 }, { 0x2194, 0x0000, 0x0000, 0xb017 }, + { 0x2198, 0x0000, 0x0000, 0xb017 }, { 0x219c, 0x0000, 0x0000, 0xb017 }, + { 0x21a0, 0x0000, 0x0010, 0xb017 }, { 0x21a4, 0x0000, 0x0000, 0xb017 }, + { 0x21a8, 0x0000, 0x0000, 0xb017 }, { 0x21ac, 0x0000, 0x0000, 0xb017 }, + { 0x21b0, 0x0000, 0x0000, 0xb017 }, { 0x21b4, 0x0000, 0x0010, 0xb017 }, + { 0x21b8, 0x0000, 0x0000, 0xb017 }, { 0x21bc, 0x0000, 0x0000, 0xb017 }, + { 0x21c0, 0x0000, 0x0000, 0xb017 }, { 0x21c4, 0x0000, 0x0000, 0xb017 }, + { 0x23b9, 0x0000, 0x0000, 0xb011 }, { 0x23e0, 0x0000, 0x0020, 0xb011 }, + { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_0x3d[] = { + { 0x2000, 0x0000, 0x0001, 0xb011 }, { 0x23ff, 0x0000, 0x0000, 0xb011 }, + { 0x203a, 0x0000, 0x0080, 0xb011 }, { 0x23e1, 0x0000, 0x0000, 0xb011 }, + { 0x2012, 0x0000, 0x006f, 0xb011 }, { 0x2014, 0x0000, 0x0000, 0xb011 }, + { 0x201b, 0x0000, 0x0002, 0xb011 }, { 0x201d, 0x0000, 0x0002, 0xb011 }, + { 0x201f, 0x0000, 0x00fd, 0xb011 }, { 0x2021, 0x0000, 0x0001, 0xb011 }, + { 0x2022, 0x0000, 0x0010, 0xb011 }, { 0x203d, 0x0000, 0x0005, 0xb011 }, + { 0x203f, 0x0000, 0x0003, 0xb011 }, { 0x2050, 0x0000, 0x002c, 0xb011 }, + { 0x2076, 0x0000, 0x000e, 0xb011 }, { 0x207c, 0x0000, 0x004a, 0xb011 }, + { 0x2081, 0x0000, 0x0003, 0xb011 }, { 0x23ba, 0x0000, 0x008d, 0xb011 }, + { 0x2128, 0x0005, 0x460d, 0xb017 }, { 0x212c, 0x00f6, 0x73e5, 0xb017 }, + { 0x2130, 0x0005, 0x460d, 0xb017 }, { 0x2134, 0x00c0, 0xe9e5, 0xb017 }, + { 0x2138, 0x00d5, 0x010b, 0xb017 }, { 0x213c, 0x009d, 0x7809, 0xb017 }, + { 0x2140, 0x00c5, 0x0eed, 0xb017 }, { 0x2144, 0x009d, 0x7809, 0xb017 }, + { 0x2148, 0x00c4, 0x4ef0, 0xb017 }, { 0x214c, 0x003a, 0x3106, 0xb017 }, + { 0x2150, 0x00af, 0x750e, 0xb017 }, { 0x2154, 0x008c, 0x1ff1, 0xb017 }, + { 0x2158, 0x009e, 0x360c, 0xb017 }, { 0x215c, 0x008c, 0x1ff1, 0xb017 }, + { 0x2160, 0x004d, 0xac0a, 0xb017 }, { 0x2164, 0x007d, 0xa00f, 0xb017 }, + { 0x2168, 0x00e1, 0x9ce3, 0xb017 }, { 0x216c, 0x00e8, 0x590e, 0xb017 }, + { 0x2170, 0x00e1, 0x9ce3, 0xb017 }, { 0x2174, 0x0066, 0xfa0d, 0xb017 }, + { 0x2178, 0x0000, 0x0010, 0xb017 }, { 0x217c, 0x0000, 0x0000, 0xb017 }, + { 0x2180, 0x0000, 0x0000, 0xb017 }, { 0x2184, 0x0000, 0x0000, 0xb017 }, + { 0x2188, 0x0000, 0x0000, 0xb017 }, { 0x218c, 0x0000, 0x0010, 0xb017 }, + { 0x2190, 0x0000, 0x0000, 0xb017 }, { 0x2194, 0x0000, 0x0000, 0xb017 }, + { 0x2198, 0x0000, 0x0000, 0xb017 }, { 0x219c, 0x0000, 0x0000, 0xb017 }, + { 0x21a0, 0x0000, 0x0010, 0xb017 }, { 0x21a4, 0x0000, 0x0000, 0xb017 }, + { 0x21a8, 0x0000, 0x0000, 0xb017 }, { 0x21ac, 0x0000, 0x0000, 0xb017 }, + { 0x21b0, 0x0000, 0x0000, 0xb017 }, { 0x21b4, 0x0000, 0x0010, 0xb017 }, + { 0x21b8, 0x0000, 0x0000, 0xb017 }, { 0x21bc, 0x0000, 0x0000, 0xb017 }, + { 0x21c0, 0x0000, 0x0000, 0xb017 }, { 0x21c4, 0x0000, 0x0000, 0xb017 }, + { 0x23b9, 0x0000, 0x0000, 0xb011 }, { 0x23e0, 0x0000, 0x0020, 0xb011 }, + { 0x23e1, 0x0000, 0x0001, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_seq_desc amp_seq1[] = { + { 0x23ff, 0x0000, 0x0000, 0xb011 }, { 0x203a, 0x0000, 0x0080, 0xb011 }, + }; + + static const struct alc298_samsung_coeff_fixup_desc fixups2[] = { + { 0x4f, 0xb029 }, { 0x05, 0x2be0 }, { 0x30, 0x2421 }, + }; + + + static const struct alc298_samsung_coeff_seq_desc amp_seq2[] = { + { 0x203a, 0x0000, 0x0081, 0xb011 }, { 0x23ff, 0x0000, 0x0001, 0xb011 }, + }; + + if (action != HDA_FIXUP_ACT_INIT) + return; + + // First set of fixups + for (i = 0; i < ARRAY_SIZE(fixups1); i++) + alc_write_coef_idx(codec, fixups1[i].coeff_idx, fixups1[i].coeff_value); + + // First set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_0x38, ARRAY_SIZE(amp_0x38)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_0x39, ARRAY_SIZE(amp_0x39)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_0x3c, ARRAY_SIZE(amp_0x3c)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_0x3d, ARRAY_SIZE(amp_0x3d)); + + // Second set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_seq1, ARRAY_SIZE(amp_seq1)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_seq1, ARRAY_SIZE(amp_seq1)); + + // Second set of fixups + for (i = 0; i < ARRAY_SIZE(fixups2); i++) + alc_write_coef_idx(codec, fixups2[i].coeff_idx, fixups2[i].coeff_value); + + // Third set of writes + alc298_samsung_write_coef_pack_seq(codec, 0x38, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x39, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x3c, amp_seq2, ARRAY_SIZE(amp_seq2)); + alc298_samsung_write_coef_pack_seq(codec, 0x3d, amp_seq2, ARRAY_SIZE(amp_seq2)); + + // Final fixup + alc_write_coef_idx(codec, 0x10, 0x0F21); +} diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 49bd7097d8928aa11ffa53ee22748bd1a304d893..89d8235537cd3b01f96a210be2fcc84b07e891ae 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -2,10 +2,12 @@ // // TAS2781 HDA I2C driver // -// Copyright 2023 Texas Instruments, Inc. +// Copyright 2023 - 2024 Texas Instruments, Inc. // // Author: Shenghao Ding +// Current maintainer: Baojun Xu +#include #include #include #include @@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) static const unsigned char rgno_array[CALIB_MAX] = { 0x74, 0x0c, 0x14, 0x70, 0x7c, }; - unsigned char *data; + int offset = 0; int i, j, rc; + __be32 data; for (i = 0; i < tas_priv->ndev; i++) { - data = tas_priv->cali_data.data + - i * TASDEVICE_SPEAKER_CALIBRATION_SIZE; for (j = 0; j < CALIB_MAX; j++) { + data = cpu_to_be32( + *(uint32_t *)&tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), - &(data[4 * j]), 4); + (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); + offset += 4; } } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f54466ed8e3eef757b6d917a33607d95435675ef..0523c16305db1296b94a066bbea0cf9180e11a05 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -220,6 +220,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M5"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -402,6 +416,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A43"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A44"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -416,6 +437,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A3E"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8B27"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 7434aeeda292ec218faebdaa04011520842cb0ef..402b9a2ff02406a91440414e6a983a3e1c36972f 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -2786,15 +2786,13 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) { struct arizona_pdata *pdata = &arizona->pdata; struct device_node *np = arizona->dev->of_node; - struct property *prop; - const __be32 *cur; u32 val; u32 pdm_val[ARIZONA_MAX_PDM_SPK]; int ret; int count = 0; count = 0; - of_property_for_each_u32(np, "wlf,inmode", prop, cur, val) { + of_property_for_each_u32(np, "wlf,inmode", val) { if (count == ARRAY_SIZE(pdata->inmode)) break; @@ -2803,7 +2801,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,dmic-ref", prop, cur, val) { + of_property_for_each_u32(np, "wlf,dmic-ref", val) { if (count == ARRAY_SIZE(pdata->dmic_ref)) break; @@ -2812,7 +2810,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,out-mono", prop, cur, val) { + of_property_for_each_u32(np, "wlf,out-mono", val) { if (count == ARRAY_SIZE(pdata->out_mono)) break; @@ -2821,7 +2819,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,max-channels-clocked", prop, cur, val) { + of_property_for_each_u32(np, "wlf,max-channels-clocked", val) { if (count == ARRAY_SIZE(pdata->max_channels_clocked)) break; @@ -2830,7 +2828,7 @@ int arizona_of_get_audio_pdata(struct arizona *arizona) } count = 0; - of_property_for_each_u32(np, "wlf,out-volume-limit", prop, cur, val) { + of_property_for_each_u32(np, "wlf,out-volume-limit", val) { if (count == ARRAY_SIZE(pdata->out_vol_limit)) break; diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 287ac01a387357beffe2a8f76ab5f3ec15dc7b3d..605964af8afad4ac2763306094c2002c2f720b45 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -108,7 +108,7 @@ static efi_status_t cs_amp_get_efi_variable(efi_char16_t *name, KUNIT_STATIC_STUB_REDIRECT(cs_amp_get_efi_variable, name, guid, size, buf); - if (IS_ENABLED(CONFIG_EFI)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return efi.get_variable(name, guid, &attr, size, buf); return EFI_NOT_FOUND; diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 2392c6effed857c32326ee032f776dcc577ed0fb..1e9d73bee3b4e2042e2a98a5d06d96c517699e52 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -176,17 +176,10 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, struct snd_kcontrol *kcontrol; struct snd_kcontrol_volatile *vd; unsigned int index_offset; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - if (component->name_prefix) - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s", - component->name_prefix, ctl_name); - else - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - - kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); + kcontrol = snd_soc_component_get_kcontrol_locked(component, ctl_name); if (!kcontrol) { - dev_err(component->dev, "Can't find kcontrol %s\n", name); + dev_err(component->dev, "Can't find kcontrol %s\n", ctl_name); return -EINVAL; } diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index fc03bb7ecae136b99fccc2efb332f05c4c1124fd..7c9a17fe2195ccc6011f4bc5ee09f594860b456e 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -23,6 +23,79 @@ /* Register addresses are offset when sent over SoundWire */ #define CS35L56_SDW_ADDR_OFFSET 0x8000 +/* Cirrus bus bridge registers */ +#define CS35L56_SDW_MEM_ACCESS_STATUS 0xd0 +#define CS35L56_SDW_MEM_READ_DATA 0xd8 + +#define CS35L56_SDW_LAST_LATE BIT(3) +#define CS35L56_SDW_CMD_IN_PROGRESS BIT(2) +#define CS35L56_SDW_RDATA_RDY BIT(0) + +#define CS35L56_LATE_READ_POLL_US 10 +#define CS35L56_LATE_READ_TIMEOUT_US 1000 + +static int cs35l56_sdw_poll_mem_status(struct sdw_slave *peripheral, + unsigned int mask, + unsigned int match) +{ + int ret, val; + + ret = read_poll_timeout(sdw_read_no_pm, val, + (val < 0) || ((val & mask) == match), + CS35L56_LATE_READ_POLL_US, CS35L56_LATE_READ_TIMEOUT_US, + false, peripheral, CS35L56_SDW_MEM_ACCESS_STATUS); + if (ret < 0) + return ret; + + if (val < 0) + return val; + + return 0; +} + +static int cs35l56_sdw_slow_read(struct sdw_slave *peripheral, unsigned int reg, + u8 *buf, size_t val_size) +{ + int ret, i; + + reg += CS35L56_SDW_ADDR_OFFSET; + + for (i = 0; i < val_size; i += sizeof(u32)) { + /* Poll for bus bridge idle */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_CMD_IN_PROGRESS, + 0); + if (ret < 0) { + dev_err(&peripheral->dev, "!CMD_IN_PROGRESS fail: %d\n", ret); + return ret; + } + + /* Reading LSByte triggers read of register to holding buffer */ + sdw_read_no_pm(peripheral, reg + i); + + /* Wait for data available */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_RDATA_RDY, + CS35L56_SDW_RDATA_RDY); + if (ret < 0) { + dev_err(&peripheral->dev, "RDATA_RDY fail: %d\n", ret); + return ret; + } + + /* Read data from buffer */ + ret = sdw_nread_no_pm(peripheral, CS35L56_SDW_MEM_READ_DATA, + sizeof(u32), &buf[i]); + if (ret) { + dev_err(&peripheral->dev, "Late read @%#x failed: %d\n", reg + i, ret); + return ret; + } + + swab32s((u32 *)&buf[i]); + } + + return 0; +} + static int cs35l56_sdw_read_one(struct sdw_slave *peripheral, unsigned int reg, void *buf) { int ret; @@ -48,6 +121,10 @@ static int cs35l56_sdw_read(void *context, const void *reg_buf, int ret; reg = le32_to_cpu(*(const __le32 *)reg_buf); + + if (cs35l56_is_otp_register(reg)) + return cs35l56_sdw_slow_read(peripheral, reg, buf8, val_size); + reg += CS35L56_SDW_ADDR_OFFSET; if (val_size == 4) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e7e8d617da94e4931062f00db7bd386f14694ddf..bd74fef33d498341f8ed7604887927e6bb04e131 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -36,6 +36,7 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + { CS35L56_IRQ1_MASK_18, 0x1f7df0ff }, /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 84c34f5b1a516d092e1ded727d3eb77c9f7f5b1e..757ade6373ede393496e9cbc0ee76b2c3d9d3b1f 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1095,6 +1095,11 @@ int cs35l56_system_resume(struct device *dev) } EXPORT_SYMBOL_GPL(cs35l56_system_resume); +static int cs35l56_control_add_nop(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl) +{ + return 0; +} + static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) { struct wm_adsp *dsp; @@ -1117,6 +1122,12 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) dsp->fw = 12; dsp->wmfw_optional = true; + /* + * None of the firmware controls need to be exported so add a no-op + * callback that suppresses creating an ALSA control. + */ + dsp->control_add = &cs35l56_control_add_nop; + dev_dbg(cs35l56->base.dev, "DSP system name: '%s'\n", dsp->system_name); ret = wm_halo_init(dsp); diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 92674314227c4623af36a6b1642e83c8a33572d6..5183b4586424326e3ee900ba875aa3c6515d4f85 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -252,24 +253,20 @@ CS42L43_IRQ_COMPLETE(load_detect) static irqreturn_t cs42l43_mic_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - static const char * const controls[] = { - "Decimator 1 Switch", - "Decimator 2 Switch", - "Decimator 3 Switch", - "Decimator 4 Switch", - }; - int i, ret; + struct snd_soc_component *component = priv->component; + int i; dev_dbg(priv->dev, "Microphone shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - for (i = 0; i < ARRAY_SIZE(controls); i++) { - ret = snd_soc_component_notify_control(priv->component, - controls[i]); - if (ret) + for (i = 1; i < ARRAY_SIZE(priv->kctl); i++) { + if (!priv->kctl[i]) return IRQ_NONE; + + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[i]->id); } return IRQ_HANDLED; @@ -278,18 +275,19 @@ static irqreturn_t cs42l43_mic_shutter(int irq, void *data) static irqreturn_t cs42l43_spk_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - int ret; + struct snd_soc_component *component = priv->component; dev_dbg(priv->dev, "Speaker shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - ret = snd_soc_component_notify_control(priv->component, - "Speaker Digital Switch"); - if (ret) + if (!priv->kctl[0]) return IRQ_NONE; + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[0]->id); + return IRQ_HANDLED; } @@ -590,7 +588,46 @@ static int cs42l43_asp_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mas return 0; } +static int cs42l43_dai_probe(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + static const char * const controls[] = { + "Speaker Digital Switch", + "Decimator 1 Switch", + "Decimator 2 Switch", + "Decimator 3 Switch", + "Decimator 4 Switch", + }; + int i; + + static_assert(ARRAY_SIZE(controls) == ARRAY_SIZE(priv->kctl)); + + for (i = 0; i < ARRAY_SIZE(controls); i++) { + if (priv->kctl[i]) + continue; + + priv->kctl[i] = snd_soc_component_get_kcontrol(component, controls[i]); + } + + return 0; +} + +static int cs42l43_dai_remove(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->kctl); i++) + priv->kctl[i] = NULL; + + return 0; +} + static const struct snd_soc_dai_ops cs42l43_asp_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .hw_params = cs42l43_asp_hw_params, .set_fmt = cs42l43_asp_set_fmt, @@ -608,9 +645,11 @@ static int cs42l43_sdw_hw_params(struct snd_pcm_substream *substream, return ret; return cs42l43_set_sample_rate(substream, params, dai); -}; +} static const struct snd_soc_dai_ops cs42l43_sdw_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .set_stream = cs42l43_sdw_set_stream, .hw_params = cs42l43_sdw_hw_params, diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9924c13e1eb539a24efdcb2d5ed27c3f11cc9ed2..9c144e129535fa7f4b6ca3427df4418623f09481 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -100,6 +100,8 @@ struct cs42l43_codec { struct delayed_work hp_ilimit_clear_work; bool hp_ilimited; int hp_ilimit_count; + + struct snd_kcontrol *kctl[5]; }; #if IS_REACHABLE(CONFIG_SND_SOC_CS42L43_SDW) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index 25a86a32e936679dcfd13d9428e47618ac60706f..da52afe56c3c625aed26212dac6094e099a3d8e1 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -129,16 +129,16 @@ volsw_err: static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1270, 50, 0); -static const char * const cs530x_in_hpf_text[] = { +static const char * const cs530x_in_filter_text[] = { "Min Phase Slow Roll-off", "Min Phase Fast Roll-off", "Linear Phase Slow Roll-off", "Linear Phase Fast Roll-off", }; -static SOC_ENUM_SINGLE_DECL(cs530x_in_hpf_enum, CS530X_IN_FILTER, +static SOC_ENUM_SINGLE_DECL(cs530x_in_filter_enum, CS530X_IN_FILTER, CS530X_IN_FILTER_SHIFT, - cs530x_in_hpf_text); + cs530x_in_filter_text); static const char * const cs530x_in_4ch_sum_text[] = { "None", @@ -189,7 +189,7 @@ SOC_SINGLE_EXT_TLV("IN1 Volume", CS530X_IN_VOL_CTRL1_0, 0, 255, 1, SOC_SINGLE_EXT_TLV("IN2 Volume", CS530X_IN_VOL_CTRL1_1, 0, 255, 1, snd_soc_get_volsw, cs530x_put_volsw_vu, in_vol_tlv), -SOC_ENUM("IN HPF Select", cs530x_in_hpf_enum), +SOC_ENUM("IN DEC Filter Select", cs530x_in_filter_enum), SOC_ENUM("Input Ramp Up", cs530x_ramp_inc_enum), SOC_ENUM("Input Ramp Down", cs530x_ramp_dec_enum), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b246694ebb4faa4d7aab7de3c9145eeed9e124b9..be3c79232a3180583ef64843860ad844bd2cc2cd 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -843,6 +843,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326_disable_micbias(es8326->component); if (es8326->jack->status & SND_JACK_HEADPHONE) { dev_dbg(comp->dev, "Report hp remove event\n"); + snd_soc_jack_report(es8326->jack, 0, + SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2); snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET); /* mute adc when mic path switch */ regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44); diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index b852cc7ffad9d9417897acbf79fa64e6a9b58dbe..a62ccd09bacd72d24012a6f88f0aafcbcb4b77e0 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1472,6 +1472,8 @@ static void va_macro_set_lpass_codec_version(struct va_macro *va) if ((core_id_0 == 0x01) && (core_id_1 == 0x0F)) version = LPASS_CODEC_VERSION_2_0; + if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && core_id_2 == 0x01) + version = LPASS_CODEC_VERSION_2_0; if ((core_id_0 == 0x02) && (core_id_1 == 0x0E)) version = LPASS_CODEC_VERSION_2_1; if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && (core_id_2 == 0x50 || core_id_2 == 0x51)) diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index e1cbaf8a944d8cf94992fbe249743a9c2d3d6259..fd4a96a12060e11f55dad2eb737dceb870dfa2ef 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -736,7 +736,7 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, return ret; } - dev_info(component->dev, + dev_dbg(component->dev, "pll_int=%x pll_frac=%x mclk_scaler=%x pre_factor=%x\n", pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 63626b982d04239216d202978be5467d7b3d0cc7..8f9a3ae7153e948146b96e92f1205fc0dd7ea56b 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2162,7 +2162,7 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) return; cal = cal_fmw->calibrations; - if (cal) + if (!cal) return; load_calib_data(priv, &cal->dev_data); diff --git a/sound/soc/codecs/wcd937x-sdw.c b/sound/soc/codecs/wcd937x-sdw.c index 3abc8041406adc24544bb33e23b56162c0c36deb..0c33f7f3dc25c6088ba8922a8e48769955f53400 100644 --- a/sound/soc/codecs/wcd937x-sdw.c +++ b/sound/soc/codecs/wcd937x-sdw.c @@ -1049,7 +1049,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; @@ -1062,7 +1062,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, /* Start in cache-only until device is enumerated */ regcache_cache_only(wcd->regmap, true); } else { - pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index c995bcc59eadb4f2c726267b48044bb3dc6e8c93..7da8a10bd0a93281822a6d315ddff06cc9c7894c 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1252,12 +1252,12 @@ static int wcd9380_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd939x-sdw.c b/sound/soc/codecs/wcd939x-sdw.c index 94b1e99a3ca0ecdb16a8dd73b2b07ee54564dd50..fca95777a75af930828e8b31575d6198c47d4639 100644 --- a/sound/soc/codecs/wcd939x-sdw.c +++ b/sound/soc/codecs/wcd939x-sdw.c @@ -1453,12 +1453,12 @@ static int wcd9390_probe(struct sdw_slave *pdev, const struct sdw_device_id *id) pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd939x_tx_dpn_prop; wcd->ch_info = &wcd939x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd939x_rx_dpn_prop; wcd->ch_info = &wcd939x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 9f8549b34e30254dc9831300b1495e58f2158d5c..e69283195f362ffd928eb006a987ad57dae0b49a 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -583,7 +583,7 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(kcontrol); } -static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) { struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); struct cs_dsp *cs_dsp = &dsp->cs_dsp; @@ -658,6 +658,17 @@ err_ctl: return ret; } +EXPORT_SYMBOL_GPL(wm_adsp_control_add); + +static int wm_adsp_control_add_cb(struct cs_dsp_coeff_ctl *cs_ctl) +{ + struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); + + if (dsp->control_add) + return (dsp->control_add)(dsp, cs_ctl); + else + return wm_adsp_control_add(cs_ctl); +} static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) { @@ -2072,12 +2083,12 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data) EXPORT_SYMBOL_GPL(wm_halo_wdt_expire); static const struct cs_dsp_client_ops wm_adsp1_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, }; static const struct cs_dsp_client_ops wm_adsp2_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, .pre_run = wm_adsp_pre_run, .post_run = wm_adsp_event_post_run, diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index e53dfcf1f78ff77e5bad89e9e1aef3bb34db242d..edc5b02ae76542378a6ff6cf794209600ef92fbc 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -37,6 +37,7 @@ struct wm_adsp { bool wmfw_optional; struct work_struct boot_work; + int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); int (*pre_run)(struct wm_adsp *dsp); bool preloaded; @@ -132,6 +133,8 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, int wm_adsp_compr_copy(struct snd_soc_component *component, struct snd_compr_stream *stream, char __user *buf, size_t count); + +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl); int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 0478599d0f359f2fbd64bc353e7b179b90adafd2..fb9e92f08d987761181cf5d3bfdbc23534f86483 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1152,7 +1152,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, wsa881x->sconfig.frame_rate = 48000; wsa881x->sconfig.direction = SDW_DATA_DIR_RX; wsa881x->sconfig.type = SDW_STREAM_PDM; - pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; pdev->prop.clk_stop_mode1 = true; diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index d0ab4e2290b6a9f049a95534cbe7673b9c1ae594..3e4fdaa3f44fb10a1d1200b02d54b4d3a3405913 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1406,7 +1406,7 @@ static int wsa883x_probe(struct sdw_slave *pdev, WSA883X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index d17ae17b2938b40ee4bb50bbacaad834e6ff4751..89eb5e03a61704dda9693d31d5e4c02b3eb98855 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1895,7 +1895,7 @@ static int wsa884x_probe(struct sdw_slave *pdev, WSA884X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa884x_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 82df887b3af5ed4236904cffc880f54982673bf4..f6c3aeff0d8eafd71bca747d77386c5ef19b7168 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -306,27 +306,12 @@ static int be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, return 0; } -SND_SOC_DAILINK_DEFS(hifi, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY(), COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(hifi_fe, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_DUMMY()), - DAILINK_COMP_ARRAY(COMP_EMPTY())); - -SND_SOC_DAILINK_DEFS(hifi_be, - DAILINK_COMP_ARRAY(COMP_EMPTY()), - DAILINK_COMP_ARRAY(COMP_EMPTY(), COMP_EMPTY())); - static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { /* Default ASoC DAI Link*/ { .name = "HiFi", .stream_name = "HiFi", .ops = &fsl_asoc_card_ops, - SND_SOC_DAILINK_REG(hifi), }, /* DPCM Link between Front-End and Back-End (Optional) */ { @@ -335,7 +320,6 @@ static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { .dpcm_playback = 1, .dpcm_capture = 1, .dynamic = 1, - SND_SOC_DAILINK_REG(hifi_fe), }, { .name = "HiFi-ASRC-BE", @@ -345,7 +329,6 @@ static const struct snd_soc_dai_link fsl_asoc_card_dai[] = { .dpcm_playback = 1, .dpcm_capture = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(hifi_be), }, }; @@ -637,6 +620,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) struct platform_device *cpu_pdev; struct fsl_asoc_card_priv *priv; struct device *codec_dev[2] = { NULL, NULL }; + struct snd_soc_dai_link_component *dlc; const char *codec_dai_name[2]; const char *codec_dev_name[2]; u32 asrc_fmt = 0; @@ -717,7 +701,35 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) memcpy(priv->dai_link, fsl_asoc_card_dai, sizeof(struct snd_soc_dai_link) * ARRAY_SIZE(priv->dai_link)); + /* + * "Default ASoC DAI Link": 1 cpus, 2 codecs, 1 platforms + * "DPCM Link Front-End": 1 cpus, 1 codecs (dummy), 1 platforms + * "DPCM Link Back-End": 1 cpus, 2 codecs + * totally 10 components + */ + dlc = devm_kcalloc(&pdev->dev, 10, sizeof(*dlc), GFP_KERNEL); + if (!dlc) { + ret = -ENOMEM; + goto asrc_fail; + } + + priv->dai_link[0].cpus = &dlc[0]; + priv->dai_link[0].num_cpus = 1; + priv->dai_link[0].codecs = &dlc[1]; priv->dai_link[0].num_codecs = 1; + priv->dai_link[0].platforms = &dlc[3]; + priv->dai_link[0].num_platforms = 1; + + priv->dai_link[1].cpus = &dlc[4]; + priv->dai_link[1].num_cpus = 1; + priv->dai_link[1].codecs = &dlc[5]; + priv->dai_link[1].num_codecs = 0; /* dummy */ + priv->dai_link[1].platforms = &dlc[6]; + priv->dai_link[1].num_platforms = 1; + + priv->dai_link[2].cpus = &dlc[7]; + priv->dai_link[2].num_cpus = 1; + priv->dai_link[2].codecs = &dlc[8]; priv->dai_link[2].num_codecs = 1; priv->card.dapm_routes = audio_map; diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 0d37edb70261cb64a47dc10659155b0e793d1c06..22b240a70ad48e65dedaf59522eb528d16a54ee2 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -831,7 +831,7 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { {REG_MICFIL_CTRL1, 0x00000000}, {REG_MICFIL_CTRL2, 0x00000000}, {REG_MICFIL_STAT, 0x00000000}, - {REG_MICFIL_FIFO_CTRL, 0x00000007}, + {REG_MICFIL_FIFO_CTRL, 0x0000001F}, {REG_MICFIL_FIFO_STAT, 0x00000000}, {REG_MICFIL_DATACH0, 0x00000000}, {REG_MICFIL_DATACH1, 0x00000000}, @@ -855,6 +855,8 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -872,9 +874,6 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: - case REG_MICFIL_FSYNC_CTRL: - case REG_MICFIL_VERID: - case REG_MICFIL_PARAM: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: @@ -883,6 +882,12 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NDATA: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + case REG_MICFIL_VERID: + case REG_MICFIL_PARAM: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } @@ -890,6 +895,8 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -899,7 +906,6 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: /* Write 1 to Clear */ - case REG_MICFIL_FSYNC_CTRL: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: /* Write 1 to Clear */ @@ -907,6 +913,10 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NCONFIG: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } diff --git a/sound/soc/fsl/fsl_micfil.h b/sound/soc/fsl/fsl_micfil.h index c6b902ba0a5317d4ead1ce6ec320d8901f0fb69b..b7798a7cbf2af87b14537a19a2657960e9a421dd 100644 --- a/sound/soc/fsl/fsl_micfil.h +++ b/sound/soc/fsl/fsl_micfil.h @@ -72,7 +72,7 @@ #define MICFIL_STAT_CHXF(ch) BIT(ch) /* MICFIL FIFO Control Register -- REG_MICFIL_FIFO_CTRL 0x10 */ -#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(2, 0) +#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(4, 0) /* MICFIL FIFO Status Register -- REG_MICFIL_FIFO_STAT 0x14 */ #define MICFIL_FIFO_STAT_FIFOX_OVER(ch) BIT(ch) diff --git a/sound/soc/intel/common/soc-acpi-intel-ssp-common.c b/sound/soc/intel/common/soc-acpi-intel-ssp-common.c index 75d0b931d895df8945f2b253f85c9c946a878b81..de7a3f7f47f104029723e941131663ae6fdc49a8 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ssp-common.c +++ b/sound/soc/intel/common/soc-acpi-intel-ssp-common.c @@ -64,6 +64,15 @@ static const struct codec_map amps[] = { CODEC_MAP_ENTRY("RT1015P", "rt1015", RT1015P_ACPI_HID, CODEC_RT1015P), CODEC_MAP_ENTRY("RT1019P", "rt1019", RT1019P_ACPI_HID, CODEC_RT1019P), CODEC_MAP_ENTRY("RT1308", "rt1308", RT1308_ACPI_HID, CODEC_RT1308), + + /* + * Monolithic components + * + * Only put components that can serve as both the amp and the codec below this line. + * This will ensure that if the part is used just as a codec and there is an amp as well + * then the amp will be selected properly. + */ + CODEC_MAP_ENTRY("RT5650", "rt5650", RT5650_ACPI_HID, CODEC_RT5650), }; enum snd_soc_acpi_intel_codec diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h index de4e550c5b34dc402999e65596981639382fcadb..42bd51456b945d0b326bab574ba4b6550851ac96 100644 --- a/sound/soc/intel/common/soc-intel-quirks.h +++ b/sound/soc/intel/common/soc-intel-quirks.h @@ -11,7 +11,7 @@ #include -#if IS_ENABLED(CONFIG_X86) +#if IS_REACHABLE(CONFIG_IOSF_MBI) #include #include diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 7e6090af720b9a2b569e20f5a25e702f0338e7d8..75909196b7698ab31e92a469dab60ea1b21502e7 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -207,25 +207,18 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) status = FIELD_GET(STATUS1_INT_STS, status); axg_fifo_ack_irq(fifo, status); - /* Use the thread to call period elapsed on nonatomic links */ - if (status & FIFO_INT_COUNT_REPEAT) - return IRQ_WAKE_THREAD; + if (status & ~FIFO_INT_COUNT_REPEAT) + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + if (status & FIFO_INT_COUNT_REPEAT) { + snd_pcm_period_elapsed(ss); + return IRQ_HANDLED; + } return IRQ_NONE; } -static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) -{ - struct snd_pcm_substream *ss = dev_id; - - snd_pcm_period_elapsed(ss); - - return IRQ_HANDLED; -} - int axg_fifo_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *ss) { @@ -251,8 +244,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, - axg_fifo_pcm_irq_block_thread, + /* Use the threaded irq handler only with non-atomic links */ + ret = request_threaded_irq(fifo->irq, NULL, + axg_fifo_pcm_irq_block, IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 4d7c2e3c929a969c134aad57bd938c7df004dae1..42f48132191979e5b167ee40e489f4a52db678f6 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -236,19 +236,45 @@ int snd_soc_component_force_enable_pin_unlocked( } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); -int snd_soc_component_notify_control(struct snd_soc_component *component, - const char * const ctl) +static void soc_get_kcontrol_name(struct snd_soc_component *component, + char *buf, int size, const char * const ctl) { - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - struct snd_kcontrol *kctl; - /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) - snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); + snprintf(buf, size, "%s %s", component->name_prefix, ctl); else - snprintf(name, ARRAY_SIZE(name), "%s", ctl); + snprintf(buf, size, "%s", ctl); +} + +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol); + +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol_locked(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol_locked); + +int snd_soc_component_notify_control(struct snd_soc_component *component, + const char * const ctl) +{ + struct snd_kcontrol *kctl; - kctl = snd_soc_card_get_kcontrol(component->card, name); + kctl = snd_soc_component_get_kcontrol(component, ctl); if (!kctl) return soc_component_ret(component, -EINVAL); diff --git a/sound/soc/sof/amd/pci-vangogh.c b/sound/soc/sof/amd/pci-vangogh.c index 16eb2994fbab905b8c1f4994f078be4f013611da..eba5808401003be4446123644e430219dc7f1ecd 100644 --- a/sound/soc/sof/amd/pci-vangogh.c +++ b/sound/soc/sof/amd/pci-vangogh.c @@ -34,7 +34,6 @@ static const struct sof_amd_acp_desc vangogh_chip_info = { .dsp_intr_base = ACP5X_DSP_SW_INTR_BASE, .sram_pte_offset = ACP5X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP5X_AXI2DAGB_SEM_0, - .acp_clkmux_sel = ACP5X_CLKMUX_SEL, .probe_reg_offset = ACP5X_FUTURE_REG_ACLK_0, }; diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index 1c7019c3cbd38ccaed5b5aca638a5d299b5ce824..cdd1e79ef9f6a443181be6350dd92cd10ed0f30f 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -234,7 +234,7 @@ static int imx8m_probe(struct snd_sof_dev *sdev) /* set default mailbox offset for FW ready message */ sdev->dsp_box.offset = MBOX_OFFSET; - priv->regmap = syscon_regmap_lookup_by_compatible("fsl,dsp-ctrl"); + priv->regmap = syscon_regmap_lookup_by_phandle(np, "fsl,dsp-ctrl"); if (IS_ERR(priv->regmap)) { dev_err(sdev->dev, "cannot find dsp-ctrl registers"); ret = PTR_ERR(priv->regmap); diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index b8b914eaf7e0555fdefefb8eeeb7bd2878395659..75f6240cf3e1d3c4ded00423e7d3c3f5775c0092 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -310,15 +310,19 @@ int hda_cl_copy_fw(struct snd_sof_dev *sdev, struct hdac_ext_stream *hext_stream return ret; } - /* Wait for completion of transfer */ - time_left = wait_for_completion_timeout(&hda_stream->ioc, - msecs_to_jiffies(HDA_CL_DMA_IOC_TIMEOUT_MS)); - - if (!time_left) { - dev_err(sdev->dev, "Code loader DMA did not complete\n"); - return -ETIMEDOUT; + if (sdev->pdata->ipc_type == SOF_IPC_TYPE_4) { + /* Wait for completion of transfer */ + time_left = wait_for_completion_timeout(&hda_stream->ioc, + msecs_to_jiffies(HDA_CL_DMA_IOC_TIMEOUT_MS)); + + if (!time_left) { + dev_err(sdev->dev, "Code loader DMA did not complete\n"); + return -ETIMEDOUT; + } + dev_dbg(sdev->dev, "Code loader DMA done\n"); } - dev_dbg(sdev->dev, "Code loader DMA done, waiting for FW_ENTERED status\n"); + + dev_dbg(sdev->dev, "waiting for FW_ENTERED status\n"); status = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, chip->rom_status_reg, reg, diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index daf364f773ddcdda948bfb9abf27bd6f5868082d..5a40b8fbbbd3b561c49957323b6ad0b1de46ee9d 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1307,9 +1307,10 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) const struct sof_dev_desc *desc = sof_pdata->desc; struct hdac_bus *bus = sof_to_bus(sdev); struct snd_soc_acpi_mach *mach = NULL; - enum snd_soc_acpi_intel_codec codec_type; + enum snd_soc_acpi_intel_codec codec_type, amp_type; const char *tplg_filename; const char *tplg_suffix; + bool amp_name_valid; /* Try I2S or DMIC if it is supported */ if (interface_mask & (BIT(SOF_DAI_INTEL_SSP) | BIT(SOF_DAI_INTEL_DMIC))) @@ -1413,15 +1414,16 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) } } - codec_type = snd_soc_acpi_intel_detect_amp_type(sdev->dev); + amp_type = snd_soc_acpi_intel_detect_amp_type(sdev->dev); + codec_type = snd_soc_acpi_intel_detect_codec_type(sdev->dev); + amp_name_valid = amp_type != CODEC_NONE && amp_type != codec_type; - if (tplg_fixup && - mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_AMP_NAME && - codec_type != CODEC_NONE) { - tplg_suffix = snd_soc_acpi_intel_get_amp_tplg_suffix(codec_type); + if (tplg_fixup && amp_name_valid && + mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_AMP_NAME) { + tplg_suffix = snd_soc_acpi_intel_get_amp_tplg_suffix(amp_type); if (!tplg_suffix) { dev_err(sdev->dev, "no tplg suffix found, amp %d\n", - codec_type); + amp_type); return NULL; } @@ -1436,7 +1438,6 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) add_extension = true; } - codec_type = snd_soc_acpi_intel_detect_codec_type(sdev->dev); if (tplg_fixup && mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_CODEC_NAME && diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index 90f6856ee80cd2a2c16b52d0472689b944560a72..87be7f16e8c2b65e421739dc48b2f19db9d00548 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -1358,7 +1358,13 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget) ipc4_copier = dai->private; if (pipeline->use_chain_dma) { - pipeline->msg.primary = 0; + /* + * Preserve the DMA Link ID and clear other bits since + * the DMA Link ID is only configured once during + * dai_config, other fields are expected to be 0 for + * re-configuration + */ + pipeline->msg.primary &= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; pipeline->msg.extension = 0; } @@ -3095,8 +3101,14 @@ static int sof_ipc4_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * return 0; if (pipeline->use_chain_dma) { - pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; - pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + /* + * Only configure the DMA Link ID for ChainDMA when this op is + * invoked with SOF_DAI_CONFIG_FLAGS_HW_PARAMS + */ + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) { + pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; + pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + } return 0; } diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 24ae1d4959be59a9d683310248f420eab5b55ef2..1c6e035fd313f7c08bb31b49bac967e04f387fe1 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -573,7 +573,7 @@ static const struct snd_sof_dsp_ops sof_mt8195_ops = { static struct snd_sof_of_mach sof_mt8195_machs[] = { { .compatible = "google,tomato", - .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682-dts.tplg" + .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682.tplg" }, { .compatible = "mediatek,mt8195", .sof_tplg_filename = "sof-mt8195.tplg" diff --git a/sound/soc/sti/sti_uniperif.c b/sound/soc/sti/sti_uniperif.c index ba824f14a39cf39a9164cb0806094947a297f4dc..a7956e5a4ee5d1a2d0248ec1a90532fed022c13c 100644 --- a/sound/soc/sti/sti_uniperif.c +++ b/sound/soc/sti/sti_uniperif.c @@ -352,7 +352,7 @@ static int sti_uniperiph_resume(struct snd_soc_component *component) return ret; } -static int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct sti_uniperiph_dai *dai_data = &priv->dai_data; diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h index 2a5de328501c192333187365794f49b09044dd93..74e51f0ff85c841dad9583b596587329baf6365a 100644 --- a/sound/soc/sti/uniperif.h +++ b/sound/soc/sti/uniperif.h @@ -1380,6 +1380,7 @@ int uni_reader_init(struct platform_device *pdev, struct uniperif *reader); /* common */ +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai); int sti_uniperiph_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index dd9013c4766491af0980e74020c4dcfb4d4d17a2..6d1ce030963c628935b5762de6ae7d0c7da9f91c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -1038,6 +1038,7 @@ static const struct snd_soc_dai_ops uni_player_dai_ops = { .startup = uni_player_startup, .shutdown = uni_player_shutdown, .prepare = uni_player_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_player_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 065c5f0d1f5f00c65715ccde98360816ddbf67a9..05ea2b794eb92563d4a53ac66791ae6780796b6a 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -401,6 +401,7 @@ static const struct snd_soc_dai_ops uni_reader_dai_ops = { .startup = uni_reader_startup, .shutdown = uni_reader_shutdown, .prepare = uni_reader_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_reader_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 74effc57a7a06be099a0d865370b5881c1d9c937..2463c22e9cf6d76af6d24e7343ee0adbb00f5b49 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -78,6 +78,7 @@ config SND_SOC_TEGRA210_DMIC config SND_SOC_TEGRA210_I2S tristate "Tegra210 I2S module" + select SND_SIMPLE_CARD_UTILS help Config to enable the Inter-IC Sound (I2S) Controller which implements full-duplex and bidirectional and single direction diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index f4437015d43a7500b809a303f175b211662d500f..9df49a880b750d639c5d23abef2cbf0a07e1f9c8 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -286,12 +286,14 @@ static void line6_data_received(struct urb *urb) { struct usb_line6 *line6 = (struct usb_line6 *)urb->context; struct midi_buffer *mb = &line6->line6midi->midibuf_in; + unsigned long flags; int done; if (urb->status == -ESHUTDOWN) return; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_write(mb, urb->transfer_buffer, urb->actual_length); @@ -300,12 +302,15 @@ static void line6_data_received(struct urb *urb) dev_dbg(line6->ifcdev, "%d %d buffer overflow - message skipped\n", done, urb->actual_length); } + spin_unlock_irqrestore(&line6->line6midi->lock, flags); for (;;) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN, LINE6_MIDIBUF_READ_RX); + spin_unlock_irqrestore(&line6->line6midi->lock, flags); if (done <= 0) break; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c00009b545c002d7fead12091e75f3e15c259665..f7ce8e8c3c3eaa0cdee680302afc768436096997 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1211,6 +1211,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 16; } break; + case USB_ID(0x1bcf, 0x2281): /* HD Webcam */ + if (!strcmp(kctl->id.name, "Mic Capture Volume")) { + usb_audio_info(chip, + "set resolution quirk: cval->res = 16\n"); + cval->res = 16; + } + break; } } diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 73abc38a5400689423497d266c587371923aaa66..aaa6a515d0f8a4b22691c0862a26841811804f26 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL), YAMAHA_DEVICE(0x105b, NULL), YAMAHA_DEVICE(0x105c, NULL), YAMAHA_DEVICE(0x105d, NULL), +YAMAHA_DEVICE(0x1718, "P-125"), { USB_DEVICE(0x0499, 0x1503), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { @@ -2594,6 +2595,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Stanton ScratchAmp */ +{ USB_DEVICE(0x103d, 0x0100) }, +{ USB_DEVICE(0x103d, 0x0101) }, + /* Novation EMS devices */ { USB_DEVICE_VENDOR_SPEC(0x1235, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 58156fbca02c78a76e15473380923733924d490f..e7b68c67852e92ef851d7f820dfd6c522e2c0e24 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2125,6 +2125,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ @@ -2167,6 +2169,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2281, /* HD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ @@ -2217,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/sound/usb/stream.c b/sound/usb/stream.c index d5409f3879455961182713d24327836219e73457..e14c725acebf2c1dbf69f56b421081a3beb1b1cb 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -244,8 +244,8 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ - SNDRV_CHMAP_SL, /* left surround */ - SNDRV_CHMAP_SR, /* right surround */ + SNDRV_CHMAP_RL, /* left surround */ + SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a21d3abc9f86e7b6acd7dc861793b2..5fd7caea441936246ce9ddb7b5d7bdf04b30d14b 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a9c1a4df9384c4ff02ecd8ce21b92..eaeda001784ebb6f21c4e64bad3b632c2b292031 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661c66e7c34283f0a3f2c59a87f8044..dd4682857c12083c099b1aee6c47df77287bb179 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c64050aedcefc5b8962c2049184c6..82c6a4d350e09e6d8f98f11189cc5cb8f5e58a7c 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529caf53c5c7dd1823fd7a3f320637c..bf57a824f72281218a7c145e44587f3845a50aad 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337dfe064d809966a309cecd0930070..1814b413fd5783d295a7c85bf67f27835467a4ce 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 489cbed7e82a7d59aacc5923d9c769a400788ab2..12796808f07a8c0d21d1ebdeddba8d6cf688c25a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -82,7 +82,30 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, are required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + endif + endif +endif all: $(FILES) @@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin: DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) -DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif $(OUTPUT)test-dwarf.bin: @@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin: $(BUILD) -lnuma $(OUTPUT)test-libunwind.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-debug-frame.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-x86.bin: - $(BUILD) -lelf -lunwind-x86 + $(BUILD) -lelf -llzma -lunwind-x86 $(OUTPUT)test-libunwind-x86_64.bin: - $(BUILD) -lelf -lunwind-x86_64 + $(BUILD) -lelf -llzma -lunwind-x86_64 $(OUTPUT)test-libunwind-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libunwind-debug-frame-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-debug-frame-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 210c13b1b8570b46aab703a3383ef5947e8fa1f5..2a7f260ef9dc5395aa0f5d885d325fa48f857289 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -19,7 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); -void bitmap_clear(unsigned long *map, unsigned int start, int len); +void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -150,4 +150,19 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_clear(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map &= ~GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0, nbits / 8); + else + __bitmap_clear(map, start, nbits); +} #endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 0000000000000000000000000000000000000000..7147b1b2cb2850eec4a5e71de8e9addd0709a871 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d027235a4a27760cb9aac33048ab641420..5bf6148cac2b93e36f362236545e2e6eb473d5ff 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e0777e7173ccfc9deb30d5ef10d3cc8..535cb68fdb5c4b49b3ae4784338b66e5bb18268f 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa66bcbc10f9b4adc08f2a61db377b..d358add1611cd13d034b35d9a19cb3ebcc3e388c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc063de39a083db4c8694376de4f3f1..637efc05514534ca635671f64f84bfd65a1b8ea1 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d63734d632ab03cd1966211473d8c..4842c36fdf80199696b9e4a77d363fbcd9f3996e 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d53531664d0a65fbcf62585204e41588d88..887a2528644168a3f0f16752e6b21958a89e0bac 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index c3e4871967bcf9e163f82203fcb3281b5a77c2d0..2178862bb114cbad1da670be63ad1fdb80836c0c 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -100,3 +100,23 @@ bool __bitmap_intersects(const unsigned long *bitmap1, return true; return false; } + +void __bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index 3766886c4bca3d967ed2e714982be5016c4816f4..83dc87c662b63ecc17553a15cc15a6b8d6f01d83 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime. $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a If UBSan detects any problem at runtime, it outputs a “runtime error:” message. + +4) Cross compilation +==================== +As Multiarch is commonly supported in Linux distributions, we can install +libraries for multiple architectures on the same system and then cross-compile +Linux perf. For example, Aarch64 libraries and toolchains can be installed on +an x86_64 machine, allowing us to compile perf for an Aarch64 target. + +Below is the command for building the perf with dynamic linking. + + $ cd /path/to/Linux + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +For static linking, the option `LDFLAGS="-static"` is required. + + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + LDFLAGS="-static" -C tools/perf + +In the embedded system world, a use case is to explicitly specify the package +configuration paths for cross building: + + $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \ + PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \ + make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the +variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to +the library paths for cross compilation. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c896babf7a740e2bb4fb2c50a4a9f9f6998ee315..fa679db61f6226ef838de3b1653341562960a9e3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -152,7 +152,17 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 + DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) @@ -296,6 +306,11 @@ endif ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) + # Update the python flags for cross compilation + ifdef CROSS_COMPILE + PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/') + PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS)) + endif PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) @@ -897,6 +912,9 @@ else PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no") ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes) PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])') + ifdef CROSS_COMPILE + PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX)) + endif LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX) else $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 175e4c7898f0ed58b11540c3a68f9559d7c4b5bb..f8148db5fc389b2dc2940029dda14eed4a9c156d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -193,7 +193,32 @@ HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -PKG_CONFIG = $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, is required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,) + $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.) + endif + endif +endif RM = rm -f LN = ln -f diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21c6590267499335d6a911e678e27e..ebae8415dfbbab6f6f0802913e7d0ef5de37ac70 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10a3d13ef6734512e52b9048d98692..01071182763e96ff03b5d74e4b9c4387ab84c456 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5bf97df25666a1048d0f285b8d465e..7093ee21c0d1c0410dbf17a77cb70a6bc6736f04 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# +# [ [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfbcbb6323bef3f67a8953c1978d03e..5c9335fff2d396b435ba877b82c2494e049858ea 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -688,7 +689,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - daemon->start) / 60); @@ -700,7 +701,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - daemon->start) / 60); } } @@ -727,7 +728,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - session->start) / 60); @@ -745,7 +746,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - session->start) / 60); } } diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json index a9939823b14b5a89a2bb8dc337c6e69dc90821d6..0c9b9a2d2958a427e64f0d3b6de4ec7a366d05eb 100644 --- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -74,7 +74,7 @@ { "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", "ConfigCode": "0x800000000000000c", - "EventName": "FW_SFENCE_VMA_RECEIVED", + "EventName": "FW_SFENCE_VMA_ASID_SENT", "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" }, { diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370bd412c30a755bd1324f5f8c72aad1..df9cdb8bbfb88428e1d2d6a2d16c7d1abe9ae9fc 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd6643ce3a83711cb295c711dd67ca511..7539717707337a8cb22396a869baba3bafa08371 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad007341f70a8816aa506216ffea89ec..225bc366ffcbf0319929e2f55f1fbea88e4d7b81 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d53531664d0a65fbcf62585204e41588d88..887a2528644168a3f0f16752e6b21958a89e0bac 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92eb0393dd592a38e987d08dcf6db0..8bf7e8a0eb6f03a8026e07f1968568fb099e7c9f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947400ff54cd75d8ec60fa3565fab61..6d075648d2ccf43c464461547ba5562edb385a69 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index b1e6817f1e54abe69823cb9fa7aaa8916dc6e65b..3946d5254a1fd728096fc8432e8f7f5c5f335765 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -46,6 +46,7 @@ snapshot: turbostat @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8d37acd3920156c16e1f6777221b5f7e360a5a42..067717bce1d4ab6ff21728e6765ce94446338f7c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -28,10 +28,13 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. .nf - location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP | \fBperf//\fP} msrDDD is a decimal offset, eg. msr16 msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute + is a perf device from /sys/bus/event_source/devices/ eg. cstate_core + is a perf event for given device from /sys/bus/event_source/devices//events/ eg. c1-residency + perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} sample and print the counter for every cpu, core, or package. @@ -52,6 +55,39 @@ name as necessary to disambiguate it from others is necessary. Note that option as the column header. .fi .PP +\fB--add pmt,[attr_name=attr_value, ...]\fP add column with a PMT (Intel Platform Monitoring Technology) counter in a similar way to --add option above, but require PMT metadata to be supplied to correctly read and display the counter. The metadata can be found in the Intel PMT XML files, hosted at https://github.com/intel/Intel-PMT. For a complete example see "ADD PMT COUNTER EXAMPLE". +.nf + name="name_string" + For column header. + + type={\fBraw\fP} + 'raw' shows the counter contents in hex. + default: raw + + format={\fBraw\fP | \fBdelta\fP} + 'raw' shows the counter contents in hex. + 'delta' shows the difference in values during the measurement interval. + default: raw + + domain={\fBcpu%u\fP | \fBcore%u\fP | \fBpackage%u\fP} + 'cpu' per cpu/thread counter. + 'core' per core counter. + 'package' per package counter. + '%u' denotes id of the domain that the counter is associated with. For example core4 would mean that the counter is associated with core number 4. + + offset=\fB%u\fP + '%u' offset within the PMT MMIO region. + + lsb=\fB%u\fP + '%u' least significant bit within the 64 bit value read from 'offset'. Together with 'msb', used to form a read mask. + + msb=\fB%u\fP + '%u' most significant bit within the 64 bit value read from 'offset'. Together with 'lsb', used to form a read mask. + + guid=\fB%x\fP + '%x' hex identifier of the PMT MMIO region. +.fi +.PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. @@ -67,10 +103,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--quiet\fP Do not decode and print the system configuration header information. .PP -+\fB--no-msr\fP Disable all the uses of the MSR driver. -+.PP -+\fB--no-perf\fP Disable all the uses of the perf API. -+.PP +\fB--no-msr\fP Disable all the uses of the MSR driver. +.PP +\fB--no-perf\fP Disable all the uses of the perf API. +.PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--num_iterations num\fP number of the measurement iterations. @@ -320,7 +356,7 @@ available on all processors. Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), labeling it with the column header, "PRF_CTRL", and display it only once, -afte the conclusion of a 0.1 second sleep. +after the conclusion of a 0.1 second sleep. .nf sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 0.101604 sec @@ -333,6 +369,56 @@ CPU PRF_CTRL .fi +.SH ADD PERF COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing time spent in C1 core cstate, +labeling it with the column header, "pCPU%c1", and display it only once, +after the conclusion of 0.1 second sleep. +We also show CPU%c1 built-in counter that should show similar values. +.nf +sudo ./turbostat --quiet --cpu 0-3 --show CPU,CPU%c1 --add perf/cstate_core/c1-residency,cpu,delta,percent,pCPU%c1 sleep .1 +0.102448 sec +CPU pCPU%c1 CPU%c1 +- 34.89 34.89 +0 45.99 45.99 +1 45.94 45.94 +2 23.83 23.83 +3 23.84 23.84 + +.fi + +.SH ADD PMT COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number 0. +We add two counters, showing crystal clock count and the DC6 residency. +All the parameters passed are based on the metadata found in the PMT XML files. + +For the crystal clock count, we +label it with the column header, "XTAL", +we set the type to 'raw', to read the number of clock ticks in hex, +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +For the DC6 residency counter, we +label it with the column header, "Die%c6", +we set the type to 'txtal_time', to obtain the percent residency value +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +.nf +sudo ./turbostat --quiet --cpu 0 --show CPU --add pmt,name=XTAL,type=raw,format=delta,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102 --add pmt,name=Die%c6,type=txtal_time,format=delta,domain=package0,offset=120,lsb=0,msb=63,guid=0x1a067102 +0.104352 sec +CPU XTAL Die%c6 +- 0x0000006d4d957ca7 0.00 +0 0x0000006d4d957ca7 0.00 +0.102448 sec +.fi + .SH INPUT For interval-mode, turbostat will immediately end the current interval diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9f5d053d4bc639223e7c7f8c3796e6c93574caea..089220aaa5c9291b5e0432ec1d0fa9b3f44b3e1a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9,6 +9,30 @@ #define _GNU_SOURCE #include MSRHEADER + +// copied from arch/x86/include/asm/cpu_device_id.h +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +// end copied section + +#define X86_VENDOR_INTEL 0 + #include INTEL_FAMILY_HEADER #include BUILD_BUG_HEADER #include @@ -20,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -55,15 +80,39 @@ */ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define PERF_NAME_BYTES 128 #define MAX_NOFILE 0x8000 +#define COUNTER_KIND_PERF_PREFIX "perf/" +#define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) +#define PERF_DEV_NAME_BYTES 32 +#define PERF_EVT_NAME_BYTES 32 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; -enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; -enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; -enum cstate_source { CSTATE_SOURCE_NONE, CSTATE_SOURCE_PERF, CSTATE_SOURCE_MSR }; +enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; + +struct perf_counter_info { + struct perf_counter_info *next; + + /* How to open the counter / What counter it is. */ + char device[PERF_DEV_NAME_BYTES]; + char event[PERF_EVT_NAME_BYTES]; + + /* How to show/format the counter. */ + char name[PERF_NAME_BYTES]; + unsigned int width; + enum counter_scope scope; + enum counter_type type; + enum counter_format format; + double scale; + + /* For reading the counter. */ + int *fd_perf_per_domain; + size_t num_domains; +}; struct sysfs_path { char path[PATH_BYTES]; @@ -144,6 +193,7 @@ struct msr_counter bic[] = { { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -205,11 +255,12 @@ struct msr_counter bic[] = { #define BIC_SAM_mc6 (1ULL << 55) #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) +#define BIC_Diec6 (1ULL << 58) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) +#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -252,7 +303,6 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; -struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -267,6 +317,7 @@ unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int has_aperf; +unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; @@ -307,7 +358,6 @@ unsigned int first_counter_read = 1; int ignore_stdin; bool no_msr; bool no_perf; -enum amperf_source amperf_source; enum gfx_sysfs_idx { GFX_rc6, @@ -367,7 +417,7 @@ struct platform_features { }; struct platform_data { - unsigned int model; + unsigned int vfm; const struct platform_features *features; }; @@ -910,75 +960,75 @@ static const struct platform_features amd_features_with_rapl = { }; static const struct platform_data turbostat_pdata[] = { - { INTEL_FAM6_NEHALEM, &nhm_features }, - { INTEL_FAM6_NEHALEM_G, &nhm_features }, - { INTEL_FAM6_NEHALEM_EP, &nhm_features }, - { INTEL_FAM6_NEHALEM_EX, &nhx_features }, - { INTEL_FAM6_WESTMERE, &nhm_features }, - { INTEL_FAM6_WESTMERE_EP, &nhm_features }, - { INTEL_FAM6_WESTMERE_EX, &nhx_features }, - { INTEL_FAM6_SANDYBRIDGE, &snb_features }, - { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, - { INTEL_FAM6_IVYBRIDGE, &ivb_features }, - { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, - { INTEL_FAM6_HASWELL, &hsw_features }, - { INTEL_FAM6_HASWELL_X, &hsx_features }, - { INTEL_FAM6_HASWELL_L, &hswl_features }, - { INTEL_FAM6_HASWELL_G, &hswg_features }, - { INTEL_FAM6_BROADWELL, &bdw_features }, - { INTEL_FAM6_BROADWELL_G, &bdwg_features }, - { INTEL_FAM6_BROADWELL_X, &bdx_features }, - { INTEL_FAM6_BROADWELL_D, &bdx_features }, - { INTEL_FAM6_SKYLAKE_L, &skl_features }, - { INTEL_FAM6_SKYLAKE, &skl_features }, - { INTEL_FAM6_SKYLAKE_X, &skx_features }, - { INTEL_FAM6_KABYLAKE_L, &skl_features }, - { INTEL_FAM6_KABYLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE_L, &skl_features }, - { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_X, &icx_features }, - { INTEL_FAM6_ICELAKE_D, &icx_features }, - { INTEL_FAM6_ICELAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, - { INTEL_FAM6_ROCKETLAKE, &cnl_features }, - { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, - { INTEL_FAM6_TIGERLAKE, &cnl_features }, - { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, - { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, - { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, - { INTEL_FAM6_LAKEFIELD, &cnl_features }, - { INTEL_FAM6_ALDERLAKE, &adl_features }, - { INTEL_FAM6_ALDERLAKE_L, &adl_features }, - { INTEL_FAM6_RAPTORLAKE, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, - { INTEL_FAM6_METEORLAKE, &cnl_features }, - { INTEL_FAM6_METEORLAKE_L, &cnl_features }, - { INTEL_FAM6_ARROWLAKE_H, &arl_features }, - { INTEL_FAM6_ARROWLAKE_U, &arl_features }, - { INTEL_FAM6_ARROWLAKE, &arl_features }, - { INTEL_FAM6_LUNARLAKE_M, &arl_features }, - { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, - { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, - { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, - { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, - { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, - { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, - { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, - { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, - { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, - { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, - { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, - { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, - { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, - { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + { INTEL_NEHALEM, &nhm_features }, + { INTEL_NEHALEM_G, &nhm_features }, + { INTEL_NEHALEM_EP, &nhm_features }, + { INTEL_NEHALEM_EX, &nhx_features }, + { INTEL_WESTMERE, &nhm_features }, + { INTEL_WESTMERE_EP, &nhm_features }, + { INTEL_WESTMERE_EX, &nhx_features }, + { INTEL_SANDYBRIDGE, &snb_features }, + { INTEL_SANDYBRIDGE_X, &snx_features }, + { INTEL_IVYBRIDGE, &ivb_features }, + { INTEL_IVYBRIDGE_X, &ivx_features }, + { INTEL_HASWELL, &hsw_features }, + { INTEL_HASWELL_X, &hsx_features }, + { INTEL_HASWELL_L, &hswl_features }, + { INTEL_HASWELL_G, &hswg_features }, + { INTEL_BROADWELL, &bdw_features }, + { INTEL_BROADWELL_G, &bdwg_features }, + { INTEL_BROADWELL_X, &bdx_features }, + { INTEL_BROADWELL_D, &bdx_features }, + { INTEL_SKYLAKE_L, &skl_features }, + { INTEL_SKYLAKE, &skl_features }, + { INTEL_SKYLAKE_X, &skx_features }, + { INTEL_KABYLAKE_L, &skl_features }, + { INTEL_KABYLAKE, &skl_features }, + { INTEL_COMETLAKE, &skl_features }, + { INTEL_COMETLAKE_L, &skl_features }, + { INTEL_CANNONLAKE_L, &cnl_features }, + { INTEL_ICELAKE_X, &icx_features }, + { INTEL_ICELAKE_D, &icx_features }, + { INTEL_ICELAKE_L, &cnl_features }, + { INTEL_ICELAKE_NNPI, &cnl_features }, + { INTEL_ROCKETLAKE, &cnl_features }, + { INTEL_TIGERLAKE_L, &cnl_features }, + { INTEL_TIGERLAKE, &cnl_features }, + { INTEL_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_EMERALDRAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_LAKEFIELD, &cnl_features }, + { INTEL_ALDERLAKE, &adl_features }, + { INTEL_ALDERLAKE_L, &adl_features }, + { INTEL_RAPTORLAKE, &adl_features }, + { INTEL_RAPTORLAKE_P, &adl_features }, + { INTEL_RAPTORLAKE_S, &adl_features }, + { INTEL_METEORLAKE, &cnl_features }, + { INTEL_METEORLAKE_L, &cnl_features }, + { INTEL_ARROWLAKE_H, &arl_features }, + { INTEL_ARROWLAKE_U, &arl_features }, + { INTEL_ARROWLAKE, &arl_features }, + { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_ATOM_SILVERMONT, &slv_features }, + { INTEL_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_ATOM_AIRMONT, &amt_features }, + { INTEL_ATOM_GOLDMONT, &gmt_features }, + { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_ATOM_TREMONT, &tmt_features }, + { INTEL_ATOM_TREMONT_L, &tmt_features }, + { INTEL_ATOM_GRACEMONT, &adl_features }, + { INTEL_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_XEON_PHI_KNL, &knl_features }, + { INTEL_XEON_PHI_KNM, &knl_features }, /* * Missing support for - * INTEL_FAM6_ICELAKE - * INTEL_FAM6_ATOM_SILVERMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_NP + * INTEL_ICELAKE + * INTEL_ATOM_SILVERMONT_MID + * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, }; @@ -1003,11 +1053,11 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } - if (!genuine_intel || family != 6) + if (!genuine_intel) return; for (i = 0; turbostat_pdata[i].features; i++) { - if (turbostat_pdata[i].model == model) { + if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { platform = turbostat_pdata[i].features; return; } @@ -1034,8 +1084,13 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 +#define PMT_MAX_ADDED_THREAD_COUNTERS 24 +#define PMT_MAX_ADDED_CORE_COUNTERS 8 +#define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 #define BITMASK_SIZE 32 +#define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) + /* Indexes used to map data read from perf and MSRs into global variables */ enum rapl_rci_index { RAPL_RCI_INDEX_ENERGY_PKG = 0, @@ -1056,19 +1111,13 @@ enum rapl_unit { struct rapl_counter_info_t { unsigned long long data[NUM_RAPL_COUNTERS]; - enum rapl_source source[NUM_RAPL_COUNTERS]; + enum counter_source source[NUM_RAPL_COUNTERS]; unsigned long long flags[NUM_RAPL_COUNTERS]; double scale[NUM_RAPL_COUNTERS]; enum rapl_unit unit[NUM_RAPL_COUNTERS]; - - union { - /* Active when source == RAPL_SOURCE_MSR */ - struct { - unsigned long long msr[NUM_RAPL_COUNTERS]; - unsigned long long msr_mask[NUM_RAPL_COUNTERS]; - int msr_shift[NUM_RAPL_COUNTERS]; - }; - }; + unsigned long long msr[NUM_RAPL_COUNTERS]; + unsigned long long msr_mask[NUM_RAPL_COUNTERS]; + int msr_shift[NUM_RAPL_COUNTERS]; int fd_perf; }; @@ -1224,7 +1273,7 @@ enum ccstate_rci_index { struct cstate_counter_info_t { unsigned long long data[NUM_CSTATE_COUNTERS]; - enum cstate_source source[NUM_CSTATE_COUNTERS]; + enum counter_source source[NUM_CSTATE_COUNTERS]; unsigned long long msr[NUM_CSTATE_COUNTERS]; int fd_perf_core; int fd_perf_pkg; @@ -1361,6 +1410,167 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { }, }; +/* Indexes used to map data read from perf and MSRs into global variables */ +enum msr_rci_index { + MSR_RCI_INDEX_APERF = 0, + MSR_RCI_INDEX_MPERF = 1, + MSR_RCI_INDEX_SMI = 2, + NUM_MSR_COUNTERS, +}; + +struct msr_counter_info_t { + unsigned long long data[NUM_MSR_COUNTERS]; + enum counter_source source[NUM_MSR_COUNTERS]; + unsigned long long msr[NUM_MSR_COUNTERS]; + unsigned long long msr_mask[NUM_MSR_COUNTERS]; + int fd_perf; +}; + +struct msr_counter_info_t *msr_counter_info; +unsigned int msr_counter_info_size; + +struct msr_counter_arch_info { + const char *perf_subsys; + const char *perf_name; + unsigned long long msr; + unsigned long long msr_mask; + unsigned int rci_index; /* Maps data from perf counters to global variables */ + bool needed; + bool present; +}; + +enum msr_arch_info_index { + MSR_ARCH_INFO_APERF_INDEX = 0, + MSR_ARCH_INFO_MPERF_INDEX = 1, + MSR_ARCH_INFO_SMI_INDEX = 2, +}; + +static struct msr_counter_arch_info msr_counter_arch_infos[] = { + [MSR_ARCH_INFO_APERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "aperf", + .msr = MSR_IA32_APERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_APERF, + }, + + [MSR_ARCH_INFO_MPERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "mperf", + .msr = MSR_IA32_MPERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_MPERF, + }, + + [MSR_ARCH_INFO_SMI_INDEX] = { + .perf_subsys = "msr", + .perf_name = "smi", + .msr = MSR_SMI_COUNT, + .msr_mask = 0xFFFFFFFF, + .rci_index = MSR_RCI_INDEX_SMI, + }, +}; + +/* Can be redefined when compiling, useful for testing. */ +#ifndef SYSFS_TELEM_PATH +#define SYSFS_TELEM_PATH "/sys/class/intel_pmt" +#endif + +#define PMT_COUNTER_MTL_DC6_OFFSET 120 +#define PMT_COUNTER_MTL_DC6_LSB 0 +#define PMT_COUNTER_MTL_DC6_MSB 63 +#define PMT_MTL_DC6_GUID 0x1a067102 + +#define PMT_COUNTER_NAME_SIZE_BYTES 16 +#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 + +struct pmt_mmio { + struct pmt_mmio *next; + + unsigned int guid; + unsigned int size; + + /* Base pointer to the mmaped memory. */ + void *mmio_base; + + /* + * Offset to be applied to the mmio_base + * to get the beginning of the PMT counters for given GUID. + */ + unsigned long pmt_offset; +} *pmt_mmios; + +enum pmt_datatype { + PMT_TYPE_RAW, + PMT_TYPE_XTAL_TIME, +}; + +struct pmt_domain_info { + /* + * Pointer to the MMIO obtained by applying a counter offset + * to the mmio_base of the mmaped region for the given GUID. + * + * This is where to read the raw value of the counter from. + */ + unsigned long *pcounter; +}; + +struct pmt_counter { + struct pmt_counter *next; + + /* PMT metadata */ + char name[PMT_COUNTER_NAME_SIZE_BYTES]; + enum pmt_datatype type; + enum counter_scope scope; + unsigned int lsb; + unsigned int msb; + + /* BIC-like metadata */ + enum counter_format format; + + unsigned int num_domains; + struct pmt_domain_info *domains; +}; + +unsigned int pmt_counter_get_width(const struct pmt_counter *p) +{ + return (p->msb - p->lsb) + 1; +} + +void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) +{ + struct pmt_domain_info *new_mem; + + new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); + if (!new_mem) { + fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); + exit(1); + } + + /* Zero initialize just allocated memory. */ + const size_t num_new_domains = new_size - pcounter->num_domains; + + memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); + + pcounter->num_domains = new_size; + pcounter->domains = new_mem; +} + +void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) +{ + /* + * Allocate more memory ahead of time. + * + * Always allocate space for at least 8 elements + * and double the size when growing. + */ + if (new_size < 8) + new_size = 8; + new_size = MAX(new_size, pcounter->num_domains * 2); + + pmt_counter_resize_(pcounter, new_size); +} + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1378,6 +1588,8 @@ struct thread_data { unsigned int flags; bool is_atom; unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -1391,6 +1603,8 @@ struct core_data { unsigned int core_id; unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -1423,7 +1637,10 @@ struct pkg_data { struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned int uncore_mhz; + unsigned long long die_c6; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -1558,12 +1775,25 @@ int idx_valid(int idx) } struct sys_counters { + /* MSR added counters */ unsigned int added_thread_counters; unsigned int added_core_counters; unsigned int added_package_counters; struct msr_counter *tp; struct msr_counter *cp; struct msr_counter *pp; + + /* perf added counters */ + unsigned int added_thread_perf_counters; + unsigned int added_core_perf_counters; + unsigned int added_package_perf_counters; + struct perf_counter_info *perf_tp; + struct perf_counter_info *perf_cp; + struct perf_counter_info *perf_pp; + + struct pmt_counter *pmt_tp; + struct pmt_counter *pmt_cp; + struct pmt_counter *pmt_pp; } sys; static size_t free_msr_counters_(struct msr_counter **pp) @@ -1747,7 +1977,7 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_SMI | BIC_Mod_c6 | BIC_CoreTmp | + const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; bic_enabled &= ~bic_msrs; @@ -1823,6 +2053,23 @@ int probe_msr(int cpu, off_t offset) return 0; } +/* Convert CPU ID to domain ID for given added perf counter. */ +unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) +{ + switch (pc->scope) { + case SCOPE_CPU: + return cpu; + + case SCOPE_CORE: + return cpus[cpu].physical_core_id; + + case SCOPE_PACKAGE: + return cpus[cpu].physical_package_id; + } + + __builtin_unreachable(); +} + #define MAX_DEFERRED 16 char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; @@ -1846,9 +2093,12 @@ void help(void) "to print statistics, until interrupted.\n" " -a, --add add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" + " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " debug messages are printed to stderr\n" " -D, --Dump displays the raw counter values\n" " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" @@ -1955,6 +2205,8 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) void print_header(char *delim) { struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; int printed = 0; if (DO_BIC(BIC_USEC)) @@ -2012,6 +2264,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_tp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_tp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c3)) @@ -2052,6 +2338,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_cp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_cp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2096,6 +2416,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_Diec6)) + outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_LPI)) outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); if (DO_BIC(BIC_SYS_LPI)) @@ -2147,6 +2469,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_pp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_pp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + outp += sprintf(outp, "\n"); } @@ -2267,6 +2623,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *fmt8; int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; char *delim = "\t"; int printed = 0; @@ -2404,6 +2762,51 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + /* Added perf counters */ + for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + if (pp->type == COUNTER_USEC) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->perf_counter[i] / interval_float / 10000); + else + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + /* C1 */ if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); @@ -2447,6 +2850,44 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) @@ -2526,6 +2967,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + if (DO_BIC(BIC_Diec6)) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) outp += @@ -2601,6 +3046,47 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); + } else if (pp->type == COUNTER_K2M) { + outp += + sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + } + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -2654,6 +3140,8 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -2674,6 +3162,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->die_c6 = new->die_c6 - old->die_c6; old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; @@ -2714,6 +3203,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->counter[i] = new->counter[i] - old->counter[i]; } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else if (pp->format == FORMAT_AVERAGE) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2721,6 +3226,8 @@ void delta_core(struct core_data *new, struct core_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; @@ -2737,6 +3244,20 @@ void delta_core(struct core_data *new, struct core_data *old) else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } } int soft_c1_residency_display(int bic) @@ -2754,6 +3275,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* we run cpuid just the 1st time, copy the results */ if (DO_BIC(BIC_APIC)) @@ -2832,6 +3355,21 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2908,6 +3446,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->die_c6 = 0; p->cpu_lpi = 0; p->sys_lpi = 0; @@ -2934,6 +3473,14 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) p->counter[i] = 0; + + memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); + memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); + memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); + + memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); + memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); + memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); } void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) @@ -2954,6 +3501,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* copy un-changing apic_id's */ if (DO_BIC(BIC_APIC)) @@ -2984,6 +3533,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.counter[i] += t->counter[i]; } + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.threads.perf_counter[i] += t->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] += t->pmt_counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3004,6 +3563,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.counter[i] += c->counter[i]; } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.cores.perf_counter[i] += c->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] += c->pmt_counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!is_cpu_first_core_in_package(t, c, p)) return 0; @@ -3027,6 +3596,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.die_c6 += p->die_c6; average.packages.cpu_lpi = p->cpu_lpi; average.packages.sys_lpi = p->sys_lpi; @@ -3055,6 +3625,18 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) else average.packages.counter[i] += p->counter[i]; } + + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.perf_counter[i] = p->perf_counter[i]; + else + average.packages.perf_counter[i] += p->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] += p->pmt_counter[i]; + } + return 0; } @@ -3066,6 +3648,8 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; clear_counters(&average.threads, &average.cores, &average.packages); @@ -3108,6 +3692,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data average.packages.pc8 /= topo.allowed_packages; average.packages.pc9 /= topo.allowed_packages; average.packages.pc10 /= topo.allowed_packages; + average.packages.die_c6 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3137,6 +3722,45 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data } average.packages.counter[i] /= topo.allowed_packages; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.threads.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } + average.threads.perf_counter[i] /= topo.allowed_cpus; + } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.cores.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.cores.perf_counter[i] /= topo.allowed_cores; + } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.packages.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.packages.perf_counter[i] /= topo.allowed_packages; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] /= topo.allowed_cpus; + } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] /= topo.allowed_cores; + } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] /= topo.allowed_packages; + } } static unsigned long long rdtsc(void) @@ -3382,50 +4006,66 @@ static unsigned int read_perf_counter_info_n(const char *const path, const char return v; } -static unsigned int read_msr_type(void) +static unsigned int read_perf_type(const char *subsys) { - const char *const path = "/sys/bus/event_source/devices/msr/type"; + const char *const path_format = "/sys/bus/event_source/devices/%s/type"; const char *const format = "%u"; + char path[128]; - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_aperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; - const char *const format = "event=%x"; + snprintf(path, sizeof(path), path_format, subsys); return read_perf_counter_info_n(path, format); } -static unsigned int read_mperf_config(void) +static unsigned int read_perf_config(const char *subsys, const char *event_name) { - const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; - const char *const format = "event=%x"; + const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; + FILE *fconfig = NULL; + char path[128]; + char config_str[64]; + unsigned int config; + unsigned int umask; + bool has_config = false; + bool has_umask = false; + unsigned int ret = -1; - return read_perf_counter_info_n(path, format); -} + snprintf(path, sizeof(path), path_format, subsys, event_name); -static unsigned int read_perf_type(const char *subsys) -{ - const char *const path_format = "/sys/bus/event_source/devices/%s/type"; - const char *const format = "%u"; - char path[128]; + fconfig = fopen(path, "r"); + if (!fconfig) + return -1; - snprintf(path, sizeof(path), path_format, subsys); + if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) + goto cleanup_and_exit; - return read_perf_counter_info_n(path, format); -} + for (char *pconfig_str = &config_str[0]; pconfig_str;) { + if (sscanf(pconfig_str, "event=%x", &config) == 1) { + has_config = true; + goto next; + } -static unsigned int read_rapl_config(const char *subsys, const char *event_name) -{ - const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; - const char *const format = "event=%x"; - char path[128]; + if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { + has_umask = true; + goto next; + } - snprintf(path, sizeof(path), path_format, subsys, event_name); +next: + pconfig_str = strchr(pconfig_str, ','); + if (pconfig_str) { + *pconfig_str = '\0'; + ++pconfig_str; + } + } - return read_perf_counter_info_n(path, format); + if (!has_umask) + umask = 0; + + if (has_config) + ret = (umask << 8) | config; + +cleanup_and_exit: + fclose(fconfig); + return ret; } static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) @@ -3444,7 +4084,7 @@ static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_na return RAPL_UNIT_INVALID; } -static double read_perf_rapl_scale(const char *subsys, const char *event_name) +static double read_perf_scale(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; const char *const format = "%lf"; @@ -3459,130 +4099,12 @@ static double read_perf_rapl_scale(const char *subsys, const char *event_name) return scale; } -static struct amperf_group_fd open_amperf_fd(int cpu) -{ - const unsigned int msr_type = read_msr_type(); - const unsigned int aperf_config = read_aperf_config(); - const unsigned int mperf_config = read_mperf_config(); - struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; - - fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); - fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); - - return fds; -} - -static int get_amperf_fd(int cpu) -{ - assert(fd_amperf_percpu); - - if (fd_amperf_percpu[cpu].aperf) - return fd_amperf_percpu[cpu].aperf; - - fd_amperf_percpu[cpu] = open_amperf_fd(cpu); - - return fd_amperf_percpu[cpu].aperf; -} - -/* Read APERF, MPERF and TSC using the perf API. */ -static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) -{ - union { - struct { - unsigned long nr_entries; - unsigned long aperf; - unsigned long mperf; - }; - - unsigned long as_array[3]; - } cnt; - - const int fd_amperf = get_amperf_fd(cpu); - - /* - * Read the TSC with rdtsc, because we want the absolute value and not - * the offset from the start of the counter. - */ - t->tsc = rdtsc(); - - const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); - - if (n != sizeof(cnt.as_array)) - return -2; - - t->aperf = cnt.aperf * aperf_mperf_multiplier; - t->mperf = cnt.mperf * aperf_mperf_multiplier; - - return 0; -} - -/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ -static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) -{ - unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; - int aperf_mperf_retry_count = 0; - - /* - * The TSC, APERF and MPERF must be read together for - * APERF/MPERF and MPERF/TSC to give accurate results. - * - * Unfortunately, APERF and MPERF are read by - * individual system call, so delays may occur - * between them. If the time to read them - * varies by a large amount, we re-read them. - */ - - /* - * This initial dummy APERF read has been seen to - * reduce jitter in the subsequent reads. - */ - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - -retry: - t->tsc = rdtsc(); /* re-read close to APERF */ - - tsc_before = t->tsc; - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - tsc_between = rdtsc(); - - if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) - return -4; - - tsc_after = rdtsc(); - - aperf_time = tsc_between - tsc_before; - mperf_time = tsc_after - tsc_between; - - /* - * If the system call latency to read APERF and MPERF - * differ by more than 2x, then try again. - */ - if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { - aperf_mperf_retry_count++; - if (aperf_mperf_retry_count < 5) - goto retry; - else - warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); - } - aperf_mperf_retry_count = 0; - - t->aperf = t->aperf * aperf_mperf_multiplier; - t->mperf = t->mperf * aperf_mperf_multiplier; - - return 0; -} - size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) { size_t ret = 0; for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) - if (rci->source[i] == RAPL_SOURCE_PERF) + if (rci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3593,7 +4115,7 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t size_t ret = 0; for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) - if (cci->source[i] == CSTATE_SOURCE_PERF) + if (cci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3611,7 +4133,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); assert(rapl_counter_info_perdomain); @@ -3634,14 +4156,14 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { - case RAPL_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case RAPL_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(rci->fd_perf != -1); - if (debug) + if (debug >= 2) fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); @@ -3650,8 +4172,8 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct ++pi; break; - case RAPL_SOURCE_MSR: - if (debug) + case COUNTER_SOURCE_MSR: + if (debug >= 2) fprintf(stderr, "Reading rapl counter via msr at %u\n", i); assert(!no_msr); @@ -3709,15 +4231,15 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat struct cstate_counter_info_t *cci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d\n", __func__, cpu); assert(ccstate_counter_info); assert(cpu <= ccstate_counter_info_size); - memset(perf_data, 0, sizeof(perf_data)); - memset(perf_data_core, 0, sizeof(perf_data_core)); - memset(perf_data_pkg, 0, sizeof(perf_data_pkg)); + ZERO_ARRAY(perf_data); + ZERO_ARRAY(perf_data_core); + ZERO_ARRAY(perf_data_pkg); cci = &ccstate_counter_info[cpu]; @@ -3772,30 +4294,28 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { switch (cci->source[i]) { - case CSTATE_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case CSTATE_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); - } cci->data[i] = perf_data[pi]; ++pi; break; - case CSTATE_SOURCE_MSR: + case COUNTER_SOURCE_MSR: assert(!no_msr); if (get_msr(cpu, cci->msr[i], &cci->data[i])) return -13 - i; - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); - } break; } @@ -3809,7 +4329,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat * when invoked for the thread sibling. */ #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ - if (cci->source[index] != CSTATE_SOURCE_NONE) \ + if (cci->source[index] != COUNTER_SOURCE_NONE) \ out_counter = cci->data[index]; \ } while (0) @@ -3833,6 +4353,135 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat return 0; } +size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) +{ + size_t ret = 0; + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) + if (mci->source[i] == COUNTER_SOURCE_PERF) + ++ret; + + return ret; +} + +int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) +{ + unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; + + struct msr_counter_info_t *mci; + + if (debug >= 2) + fprintf(stderr, "%s: cpu%d\n", __func__, cpu); + + assert(msr_counter_info); + assert(cpu <= msr_counter_info_size); + + mci = &msr_counter_info[cpu]; + + ZERO_ARRAY(perf_data); + ZERO_ARRAY(mci->data); + + if (mci->fd_perf != -1) { + const size_t num_perf_counters = msr_counter_info_count_perf(mci); + const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); + const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); + + if (actual_read_size != expected_read_size) + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, + actual_read_size); + } + + for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { + switch (mci->source[i]) { + case COUNTER_SOURCE_NONE: + break; + + case COUNTER_SOURCE_PERF: + assert(pi < ARRAY_SIZE(perf_data)); + assert(mci->fd_perf != -1); + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); + + mci->data[i] = perf_data[pi]; + + ++pi; + break; + + case COUNTER_SOURCE_MSR: + assert(!no_msr); + + if (get_msr(cpu, mci->msr[i], &mci->data[i])) + return -2 - i; + + mci->data[i] &= mci->msr_mask[i]; + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); + + break; + } + } + + BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); + t->aperf = mci->data[MSR_RCI_INDEX_APERF]; + t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; + t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; + + return 0; +} + +int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) +{ + unsigned int domain; + unsigned long long value; + int fd_counter; + + for (size_t i = 0; pp; ++i, pp = pp->next) { + domain = cpu_to_domain(pp, cpu); + assert(domain < pp->num_domains); + + fd_counter = pp->fd_perf_per_domain[domain]; + + if (fd_counter == -1) + continue; + + if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) + return 1; + + assert(i < out_size); + out[i] = value * pp->scale; + } + + return 0; +} + +unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) +{ + unsigned long mask; + + if (msb == 63) + mask = 0xffffffffffffffff; + else + mask = ((1 << (msb + 1)) - 1); + + mask -= (1 << lsb) - 1; + + return mask; +} + +unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) +{ + assert(domain_id < ppmt->num_domains); + + const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; + const unsigned long value = pmmio ? *pmmio : 0; + const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); + const unsigned long value_shift = ppmt->lsb; + + return (value & value_mask) >> value_shift; +} + /* * get_counters(...) * migrate to cpu @@ -3843,6 +4492,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; struct msr_counter *mp; + struct pmt_counter *pp; int i; int status; @@ -3858,24 +4508,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) - || soft_c1_residency_display(BIC_Avg_MHz)) { - int status = -1; - - assert(!no_perf || !no_msr); - - switch (amperf_source) { - case AMPERF_SOURCE_PERF: - status = read_aperf_mperf_tsc_perf(t, cpu); - break; - case AMPERF_SOURCE_MSR: - status = read_aperf_mperf_tsc_msr(t, cpu); - break; - } - - if (status != 0) - return status; - } + get_smi_aperf_mperf(cpu, t); if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) @@ -3883,11 +4516,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; - if (DO_BIC(BIC_SMI)) { - if (get_msr(cpu, MSR_SMI_COUNT, &msr)) - return -5; - t->smi_count = msr & 0xFFFFFFFF; - } get_cstate_counters(cpu, t, c, p); @@ -3896,6 +4524,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) + t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); + /* collect core counters only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) goto done; @@ -3934,6 +4568,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) + c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); + /* collect package counters only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) goto done; @@ -4006,6 +4646,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_mp(cpu, mp, &p->counter[i], path)) return -10; } + + if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) + p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); + done: gettimeofday(&t->tv_end, (struct timezone *)NULL); @@ -4469,25 +5116,6 @@ void free_fd_percpu(void) fd_percpu = NULL; } -void free_fd_amperf_percpu(void) -{ - int i; - - if (!fd_amperf_percpu) - return; - - for (i = 0; i < topo.max_cpu_num + 1; ++i) { - if (fd_amperf_percpu[i].mperf != 0) - close(fd_amperf_percpu[i].mperf); - - if (fd_amperf_percpu[i].aperf != 0) - close(fd_amperf_percpu[i].aperf); - } - - free(fd_amperf_percpu); - fd_amperf_percpu = NULL; -} - void free_fd_instr_count_percpu(void) { if (!fd_instr_count_percpu) @@ -4522,6 +5150,21 @@ void free_fd_cstate(void) ccstate_counter_info_size = 0; } +void free_fd_msr(void) +{ + if (!msr_counter_info) + return; + + for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { + if (msr_counter_info[cpu].fd_perf != -1) + close(msr_counter_info[cpu].fd_perf); + } + + free(msr_counter_info); + msr_counter_info = NULL; + msr_counter_info_size = 0; +} + void free_fd_rapl_percpu(void) { if (!rapl_counter_info_perdomain) @@ -4539,6 +5182,36 @@ void free_fd_rapl_percpu(void) rapl_counter_info_perdomain_size = 0; } +void free_fd_added_perf_counters_(struct perf_counter_info *pp) +{ + if (!pp) + return; + + if (!pp->fd_perf_per_domain) + return; + + while (pp) { + for (size_t domain = 0; domain < pp->num_domains; ++domain) { + if (pp->fd_perf_per_domain[domain] != -1) { + close(pp->fd_perf_per_domain[domain]); + pp->fd_perf_per_domain[domain] = -1; + } + } + + free(pp->fd_perf_per_domain); + pp->fd_perf_per_domain = NULL; + + pp = pp->next; + } +} + +void free_fd_added_perf_counters(void) +{ + free_fd_added_perf_counters_(sys.perf_tp); + free_fd_added_perf_counters_(sys.perf_cp); + free_fd_added_perf_counters_(sys.perf_pp); +} + void free_all_buffers(void) { int i; @@ -4581,9 +5254,10 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); - free_fd_amperf_percpu(); + free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); + free_fd_added_perf_counters(); free(irq_column_2_cpu); free(irqs_per_cpu); @@ -4918,16 +5592,22 @@ static void update_effective_set(bool startup) } void linux_perf_init(void); +void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void added_perf_counters_init(void); +void pmt_init(void); void re_initialize(void) { free_all_buffers(); setup_all_buffers(false); linux_perf_init(); + msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } @@ -6779,22 +7459,13 @@ static int has_instr_count_access(void) return has_access; } -bool is_aperf_access_required(void) -{ - return BIC_IS_ENABLED(BIC_Avg_MHz) - || BIC_IS_ENABLED(BIC_Busy) - || BIC_IS_ENABLED(BIC_Bzy_MHz) - || BIC_IS_ENABLED(BIC_IPC) - || BIC_IS_ENABLED(BIC_CPU_c1); -} - int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { if (no_perf) return -1; - const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); + const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); if (scale == 0.0) return -1; @@ -6805,7 +7476,7 @@ int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struc return -1; const unsigned int rapl_type = read_perf_type(cai->perf_subsys); - const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); @@ -6826,7 +7497,7 @@ int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct { int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; @@ -6846,14 +7517,6 @@ void linux_perf_init(void) if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } - - const bool aperf_required = is_aperf_access_required(); - - if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { - fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); - if (fd_amperf_percpu == NULL) - err(-1, "calloc fd_amperf_percpu"); - } } void rapl_perf_init(void) @@ -6875,7 +7538,7 @@ void rapl_perf_init(void) rci->fd_perf = -1; for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { rci->data[i] = 0; - rci->source[i] = RAPL_SOURCE_NONE; + rci->source[i] = COUNTER_SOURCE_NONE; } } @@ -6917,14 +7580,14 @@ void rapl_perf_init(void) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { - rci->source[cai->rci_index] = RAPL_SOURCE_PERF; + rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; rci->scale[cai->rci_index] = scale * cai->compat_scale; rci->unit[cai->rci_index] = unit; rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { - rci->source[cai->rci_index] = RAPL_SOURCE_MSR; + rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; rci->msr_shift[cai->rci_index] = cai->msr_shift; @@ -6934,7 +7597,7 @@ void rapl_perf_init(void) } } - if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) + if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) has_counter = 1; } @@ -6946,75 +7609,11 @@ void rapl_perf_init(void) free(domain_visited); } -static int has_amperf_access_via_msr(void) -{ - if (no_msr) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_APERF)) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_MPERF)) - return 0; - - return 1; -} - -static int has_amperf_access_via_perf(void) -{ - struct amperf_group_fd fds; - - /* - * Cache the last result, so we don't warn the user multiple times - * - * Negative means cached, no access - * Zero means not cached - * Positive means cached, has access - */ - static int has_access_cached; - - if (no_perf) - return 0; - - if (has_access_cached != 0) - return has_access_cached > 0; - - fds = open_amperf_fd(base_cpu); - has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); - - if (fds.aperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "APERF perf counter", "--no-perf"); - else - close(fds.aperf); - - if (fds.mperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "MPERF perf counter", "--no-perf"); - else - close(fds.mperf); - - if (has_access_cached == 0) - has_access_cached = -1; - - return has_access_cached > 0; -} - -/* Check if we can access APERF and MPERF */ +/* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - if (!is_aperf_access_required()) - return 0; - - if (!no_msr && has_amperf_access_via_msr()) - return 1; - - if (!no_perf && has_amperf_access_via_perf()) - return 1; - - return 0; + return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) @@ -7039,7 +7638,7 @@ int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const s return -1; const unsigned int type = read_perf_type(cai->perf_subsys); - const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); @@ -7057,13 +7656,121 @@ int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const st { int ret = add_cstate_perf_counter_(cpu, cci, cai); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; } -void cstate_perf_init_(bool soft_c1) +int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + if (no_perf) + return -1; + + const unsigned int type = read_perf_type(cai->perf_subsys); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); + + const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); + + if (fd_counter == -1) + return -1; + + /* If it's the first counter opened, make it a group descriptor */ + if (cci->fd_perf == -1) + cci->fd_perf = fd_counter; + + return fd_counter; +} + +int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + int ret = add_msr_perf_counter_(cpu, cci, cai); + + if (debug) + fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); + + return ret; +} + +void msr_perf_init_(void) +{ + const int mci_num = topo.max_cpu_num + 1; + + msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); + if (!msr_counter_info) + err(1, "calloc msr_counter_info"); + msr_counter_info_size = mci_num; + + for (int cpu = 0; cpu < mci_num; ++cpu) + msr_counter_info[cpu].fd_perf = -1; + + for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { + + struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; + + cai->present = false; + + for (int cpu = 0; cpu < mci_num; ++cpu) { + + struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; + + if (cpu_is_not_allowed(cpu)) + continue; + + if (cai->needed) { + /* Use perf API for this counter */ + if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; + cai->present = true; + + /* User MSR for this counter */ + } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; + cci->msr[cai->rci_index] = cai->msr; + cci->msr_mask[cai->rci_index] = cai->msr_mask; + cai->present = true; + } + } + } + } +} + +/* Initialize data for reading perf counters from the MSR group. */ +void msr_perf_init(void) +{ + bool need_amperf = false, need_smi = false; + const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); + + need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) + || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; + + if (BIC_IS_ENABLED(BIC_SMI)) + need_smi = true; + + /* Enable needed counters */ + msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; + + msr_perf_init_(); + + const bool has_amperf = has_amperf_access(); + const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; + + has_aperf_access = has_amperf; + + if (has_amperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + BIC_PRESENT(BIC_SMI); + } + + if (has_smi) + BIC_PRESENT(BIC_SMI); +} + +void cstate_perf_init_(bool soft_c1) { bool has_counter; bool *cores_visited = NULL, *pkg_visited = NULL; @@ -7127,17 +7834,17 @@ void cstate_perf_init_(bool soft_c1) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { - cci->source[cai->rci_index] = CSTATE_SOURCE_PERF; + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit && probe_msr(cpu, cai->msr) == 0) { - cci->source[cai->rci_index] = CSTATE_SOURCE_MSR; + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } } - if (cci->source[cai->rci_index] != CSTATE_SOURCE_NONE) { + if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { has_counter = true; cores_visited[core_id] = true; pkg_visited[pkg_id] = true; @@ -7320,12 +8027,6 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); - if (has_aperf && has_amperf_access()) { - BIC_PRESENT(BIC_Avg_MHz); - BIC_PRESENT(BIC_Busy); - BIC_PRESENT(BIC_Bzy_MHz); - BIC_PRESENT(BIC_IPC); - } do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -7442,6 +8143,11 @@ static void counter_info_init(void) if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; } + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { + msr_counter_arch_infos[i].present = false; + msr_counter_arch_infos[i].needed = false; + } } void probe_pm_features(void) @@ -7817,100 +8523,446 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } -static void set_amperf_source(void) +bool has_added_counters(void) { - amperf_source = AMPERF_SOURCE_PERF; + /* + * It only makes sense to call this after the command line is parsed, + * otherwise sys structure is not populated. + */ - const bool aperf_required = is_aperf_access_required(); + return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; +} - if (no_perf || !aperf_required || !has_amperf_access_via_perf()) - amperf_source = AMPERF_SOURCE_MSR; +void check_msr_access(void) +{ + check_dev_msr(); + check_msr_permission(); - if (quiet || !debug) - return; + if (no_msr) + bic_disable_msr_access(); +} - fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); +void check_perf_access(void) +{ + if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) + bic_enabled &= ~BIC_IPC; } -bool has_added_counters(void) +int added_perf_counters_init_(struct perf_counter_info *pinfo) +{ + size_t num_domains = 0; + unsigned int next_domain; + bool *domain_visited; + unsigned int perf_type, perf_config; + double perf_scale; + int fd_perf; + + if (!pinfo) + return 0; + + const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); + + domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); + + while (pinfo) { + switch (pinfo->scope) { + case SCOPE_CPU: + num_domains = topo.max_cpu_num + 1; + break; + + case SCOPE_CORE: + num_domains = topo.max_core_id + 1; + break; + + case SCOPE_PACKAGE: + num_domains = topo.max_package_id + 1; + break; + } + + /* Allocate buffer for file descriptor for each domain. */ + pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); + if (!pinfo->fd_perf_per_domain) + errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); + + for (size_t i = 0; i < num_domains; ++i) + pinfo->fd_perf_per_domain[i] = -1; + + pinfo->num_domains = num_domains; + pinfo->scale = 1.0; + + memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); + + for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { + + next_domain = cpu_to_domain(pinfo, cpu); + + assert(next_domain < num_domains); + + if (cpu_is_not_allowed(cpu)) + continue; + + if (domain_visited[next_domain]) + continue; + + perf_type = read_perf_type(pinfo->device); + if (perf_type == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "type"); + continue; + } + + perf_config = read_perf_config(pinfo->device, pinfo->event); + if (perf_config == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "config"); + continue; + } + + /* Scale is not required, some counters just don't have it. */ + perf_scale = read_perf_scale(pinfo->device, pinfo->event); + if (perf_scale == 0.0) + perf_scale = 1.0; + + fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); + if (fd_perf == -1) { + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", + __func__, pinfo->device, pinfo->event, cpu); + continue; + } + + domain_visited[next_domain] = 1; + pinfo->fd_perf_per_domain[next_domain] = fd_perf; + pinfo->scale = perf_scale; + + if (debug) + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", + pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + } + + pinfo = pinfo->next; + } + + free(domain_visited); + + return 0; +} + +void added_perf_counters_init(void) +{ + if (added_perf_counters_init_(sys.perf_tp)) + errx(1, "%s: %s", __func__, "thread"); + + if (added_perf_counters_init_(sys.perf_cp)) + errx(1, "%s: %s", __func__, "core"); + + if (added_perf_counters_init_(sys.perf_pp)) + errx(1, "%s: %s", __func__, "package"); +} + +int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) +{ + int fd_telem_info; + FILE *file_telem_info; + unsigned long value; + + fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); + if (fd_telem_info == -1) + return -1; + + file_telem_info = fdopen(fd_telem_info, "r"); + if (file_telem_info == NULL) { + close(fd_telem_info); + return -1; + } + + if (fscanf(file_telem_info, format, &value) != 1) { + fclose(file_telem_info); + return -1; + } + + fclose(file_telem_info); + + *output = value; + + return 0; +} + +struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) { + DIR *dirp; + struct dirent *entry; + struct stat st; + unsigned int telem_idx; + int fd_telem_dir, fd_pmt; + unsigned long guid, size, offset; + size_t mmap_size; + void *mmio; + struct pmt_mmio *ret = NULL; + + if (stat(SYSFS_TELEM_PATH, &st) == -1) + return NULL; + + dirp = opendir(SYSFS_TELEM_PATH); + if (dirp == NULL) + return NULL; + + for (;;) { + entry = readdir(dirp); + + if (entry == NULL) + break; + + if (strcmp(entry->d_name, ".") == 0) + continue; + + if (strcmp(entry->d_name, "..") == 0) + continue; + + if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) + continue; + + if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + break; + } + + if (!S_ISDIR(st.st_mode)) + continue; + + fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) { + break; + } + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + close(fd_telem_dir); + break; + } + + if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { + close(fd_telem_dir); + break; + } + + if (guid != target_guid) { + close(fd_telem_dir); + continue; + } + + if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { + close(fd_telem_dir); + break; + } + + assert(offset == 0); + + fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); + if (fd_pmt == -1) + goto loop_cleanup_and_break; + + mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); + if (mmio != MAP_FAILED) { + + if (debug) + fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); + + ret = calloc(1, sizeof(*ret)); + + if (!ret) { + fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); + exit(1); + } + + ret->guid = guid; + ret->mmio_base = mmio; + ret->pmt_offset = offset; + ret->size = size; + + ret->next = pmt_mmios; + pmt_mmios = ret; + } + +loop_cleanup_and_break: + close(fd_pmt); + close(fd_telem_dir); + break; + } + + closedir(dirp); + + return ret; +} + +struct pmt_mmio *pmt_mmio_find(unsigned int guid) +{ + struct pmt_mmio *pmmio = pmt_mmios; + + while (pmmio) { + if (pmmio->guid == guid) + return pmmio; + + pmmio = pmmio->next; + } + + return NULL; +} + +void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) +{ + char *ret; + + /* Get base of mmaped PMT file. */ + ret = (char *)pmmio->mmio_base; + /* - * It only makes sense to call this after the command line is parsed, - * otherwise sys structure is not populated. + * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. + * It's not guaranteed that the mmaped memory begins with the telemetry data + * - we might have to apply the offset first. */ + ret += pmmio->pmt_offset; - return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; + /* Apply the counter offset to get the address to the mmaped counter. */ + ret += counter_offset; + + return ret; } -bool is_msr_access_required(void) +struct pmt_mmio *pmt_add_guid(unsigned int guid) { - if (no_msr) - return false; - - if (has_added_counters()) - return true; - - return BIC_IS_ENABLED(BIC_SMI) - || BIC_IS_ENABLED(BIC_CPU_c1) - || BIC_IS_ENABLED(BIC_CPU_c3) - || BIC_IS_ENABLED(BIC_CPU_c6) - || BIC_IS_ENABLED(BIC_CPU_c7) - || BIC_IS_ENABLED(BIC_Mod_c6) - || BIC_IS_ENABLED(BIC_CoreTmp) - || BIC_IS_ENABLED(BIC_Totl_c0) - || BIC_IS_ENABLED(BIC_Any_c0) - || BIC_IS_ENABLED(BIC_GFX_c0) - || BIC_IS_ENABLED(BIC_CPUGFX) - || BIC_IS_ENABLED(BIC_Pkgpc3) - || BIC_IS_ENABLED(BIC_Pkgpc6) - || BIC_IS_ENABLED(BIC_Pkgpc2) - || BIC_IS_ENABLED(BIC_Pkgpc7) - || BIC_IS_ENABLED(BIC_Pkgpc8) - || BIC_IS_ENABLED(BIC_Pkgpc9) - || BIC_IS_ENABLED(BIC_Pkgpc10) - /* TODO: Multiplex access with perf */ - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_PkgWatt) - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_GFXWatt) - || BIC_IS_ENABLED(BIC_RAMWatt) - || BIC_IS_ENABLED(BIC_Pkg_J) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_GFX_J) - || BIC_IS_ENABLED(BIC_RAM_J) - || BIC_IS_ENABLED(BIC_PKG__) - || BIC_IS_ENABLED(BIC_RAM__) - || BIC_IS_ENABLED(BIC_PkgTmp) - || (is_aperf_access_required() && !has_amperf_access_via_perf()); + struct pmt_mmio *ret; + + ret = pmt_mmio_find(guid); + if (!ret) + ret = pmt_mmio_open(guid); + + return ret; } -void check_msr_access(void) +enum pmt_open_mode { + PMT_OPEN_TRY, /* Open failure is not an error. */ + PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ +}; + +struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) { - if (!is_msr_access_required()) - no_msr = 1; + while (pcounter) { + if (strcmp(pcounter->name, name) == 0) + break; - check_dev_msr(); - check_msr_permission(); + pcounter = pcounter->next; + } - if (no_msr) - bic_disable_msr_access(); + return pcounter; } -void check_perf_access(void) +struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) { - const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); + switch (scope) { + case SCOPE_CPU: + return &sys.pmt_tp; + case SCOPE_CORE: + return &sys.pmt_cp; + case SCOPE_PACKAGE: + return &sys.pmt_pp; + } - if (no_perf || !intrcount_required || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + __builtin_unreachable(); +} - const bool aperf_required = is_aperf_access_required(); +void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) +{ + /* Make sure the new domain fits. */ + if (domain_id >= pcounter->num_domains) + pmt_counter_resize(pcounter, domain_id + 1); - if (!aperf_required || !has_amperf_access()) { - bic_enabled &= ~BIC_Avg_MHz; - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; - bic_enabled &= ~BIC_IPC; + assert(pcounter->domains); + assert(domain_id < pcounter->num_domains); + + pcounter->domains[domain_id].pcounter = pmmio; +} + +int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, + unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, + enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) +{ + struct pmt_mmio *mmio; + struct pmt_counter *pcounter; + struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); + bool new_counter = false; + int conflict = 0; + + if (lsb > msb) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); + exit(1); + } + + if (msb >= 64) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); + exit(1); + } + + mmio = pmt_add_guid(guid); + if (!mmio) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + exit(1); + } + + return 1; + } + + if (offset >= mmio->size) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); + exit(1); + } + + return 1; + } + + pcounter = pmt_find_counter(*pmt_root, name); + if (!pcounter) { + pcounter = calloc(1, sizeof(*pcounter)); + new_counter = true; + } + + if (new_counter) { + strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); + pcounter->type = type; + pcounter->scope = scope; + pcounter->lsb = lsb; + pcounter->msb = msb; + pcounter->format = format; + } else { + conflict += pcounter->type != type; + conflict += pcounter->scope != scope; + conflict += pcounter->lsb != lsb; + conflict += pcounter->msb != msb; + conflict += pcounter->format != format; + } + + if (conflict) { + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", + __func__, name); + exit(1); + } + + pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); + + if (new_counter) { + pcounter->next = *pmt_root; + *pmt_root = pcounter; + } + + return 0; +} + +void pmt_init(void) +{ + if (BIC_IS_ENABLED(BIC_Diec6)) { + pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, + PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, + 0, PMT_OPEN_TRY); } } @@ -7923,16 +8975,18 @@ void turbostat_init() process_cpuid(); counter_info_init(); probe_pm_features(); - set_amperf_source(); + msr_perf_init(); linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (DO_BIC(BIC_IPC)) - (void)get_instr_count_fd(base_cpu); + if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) + BIC_PRESENT(BIC_IPC); /* * If TSC tweak is needed, but couldn't get it, @@ -8017,7 +9071,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.05.10 - Len Brown \n"); + fprintf(outf, "turbostat version 2024.07.26 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 @@ -8049,7 +9103,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) - printf("%s: %s %s\n", __func__, name, mp->name); + fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); if (!strncmp(name, mp->name, strlen(mp->name))) return mp; } @@ -8066,8 +9120,8 @@ int add_counter(unsigned int msr_num, char *path, char *name, errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); if (debug) - printf("%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", __func__, msr_num, - path, name, width, scope, type, format, flags, id); + fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", + __func__, msr_num, path, name, width, scope, type, format, flags, id); switch (scope) { @@ -8075,7 +9129,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.tp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { @@ -8087,7 +9141,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.cp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { @@ -8099,7 +9153,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.pp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { @@ -8116,6 +9170,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = calloc(1, sizeof(struct msr_counter)); if (msrp == NULL) err(-1, "calloc msr_counter"); + msrp->msr_num = msr_num; strncpy(msrp->name, name, NAME_BYTES - 1); msrp->width = width; @@ -8156,11 +9211,106 @@ int add_counter(unsigned int msr_num, char *path, char *name, return 0; } -void parse_add_command(char *add_command) +/* + * Initialize the fields used for identifying and opening the counter. + * + * Defer the initialization of any runtime buffers for actually reading + * the counters for when we initialize all perf counters, so we can later + * easily call re_initialize(). + */ +struct perf_counter_info *make_perf_counter_info(const char *perf_device, + const char *perf_event, + const char *name, + unsigned int width, + enum counter_scope scope, + enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + pinfo = calloc(1, sizeof(*pinfo)); + if (!pinfo) + errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); + + strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); + strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); + + strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); + pinfo->width = width; + pinfo->scope = scope; + pinfo->type = type; + pinfo->format = format; + + return pinfo; +} + +int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, + enum counter_scope scope, enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + switch (scope) { + case SCOPE_CPU: + if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { + warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_CORE: + if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { + warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_PACKAGE: + if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { + warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + } + + pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); + + if (!pinfo) + return -1; + + switch (scope) { + case SCOPE_CPU: + pinfo->next = sys.perf_tp; + sys.perf_tp = pinfo; + ++sys.added_thread_perf_counters; + break; + + case SCOPE_CORE: + pinfo->next = sys.perf_cp; + sys.perf_cp = pinfo; + ++sys.added_core_perf_counters; + break; + + case SCOPE_PACKAGE: + pinfo->next = sys.perf_pp; + sys.perf_pp = pinfo; + ++sys.added_package_perf_counters; + break; + } + + // FIXME: we might not have debug here yet + if (debug) + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", + __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + + return 0; +} + +void parse_add_command_msr(char *add_command) { int msr_num = 0; char *path = NULL; - char name_buffer[NAME_BYTES] = ""; + char perf_device[PERF_DEV_NAME_BYTES] = ""; + char perf_event[PERF_EVT_NAME_BYTES] = ""; + char name_buffer[PERF_NAME_BYTES] = ""; int width = 64; int fail = 0; enum counter_scope scope = SCOPE_CPU; @@ -8175,6 +9325,11 @@ void parse_add_command(char *add_command) if (sscanf(add_command, "msr%d", &msr_num) == 1) goto next; + BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); + BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); + if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) + goto next; + if (*add_command == '/') { path = add_command; goto next; @@ -8222,7 +9377,8 @@ void parse_add_command(char *add_command) goto next; } - if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { char *eos; eos = strchr(name_buffer, ','); @@ -8239,21 +9395,33 @@ next: } } - if ((msr_num == 0) && (path == NULL)) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); + if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); fail++; } + /* Test for non-empty perf_device and perf_event */ + const bool is_perf_counter = perf_device[0] && perf_event[0]; + /* generate default column header */ if (*name_buffer == '\0') { - if (width == 32) - sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); - else - sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + if (is_perf_counter) { + snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); + } else { + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + } } - if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) - fail++; + if (is_perf_counter) { + if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) + fail++; + } else { + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) + fail++; + } if (fail) { help(); @@ -8261,6 +9429,195 @@ next: } } +bool starts_with(const char *str, const char *prefix) +{ + return strncmp(prefix, str, strlen(prefix)) == 0; +} + +void parse_add_command_pmt(char *add_command) +{ + char *name = NULL; + char *type_name = NULL; + char *format_name = NULL; + unsigned int offset; + unsigned int lsb; + unsigned int msb; + unsigned int guid; + unsigned int domain_id; + enum counter_scope scope = 0; + enum pmt_datatype type = PMT_TYPE_RAW; + enum counter_format format = FORMAT_RAW; + bool has_offset = false; + bool has_lsb = false; + bool has_msb = false; + bool has_format = true; /* Format has a default value. */ + bool has_guid = false; + bool has_scope = false; + bool has_type = true; /* Type has a default value. */ + + /* Consume the "pmt," prefix. */ + add_command = strchr(add_command, ','); + if (!add_command) { + help(); + exit(1); + } + ++add_command; + + while (add_command) { + if (starts_with(add_command, "name=")) { + name = add_command + strlen("name="); + goto next; + } + + if (starts_with(add_command, "type=")) { + type_name = add_command + strlen("type="); + goto next; + } + + if (starts_with(add_command, "domain=")) { + const size_t prefix_len = strlen("domain="); + + if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { + scope = SCOPE_CPU; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { + scope = SCOPE_CORE; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { + scope = SCOPE_PACKAGE; + has_scope = true; + } + + if (!has_scope) { + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", + __func__); + exit(1); + } + + goto next; + } + + if (starts_with(add_command, "format=")) { + format_name = add_command + strlen("format="); + goto next; + } + + if (sscanf(add_command, "offset=%u", &offset) == 1) { + has_offset = true; + goto next; + } + + if (sscanf(add_command, "lsb=%u", &lsb) == 1) { + has_lsb = true; + goto next; + } + + if (sscanf(add_command, "msb=%u", &msb) == 1) { + has_msb = true; + goto next; + } + + if (sscanf(add_command, "guid=%x", &guid) == 1) { + has_guid = true; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) { + *add_command = '\0'; + add_command++; + } + } + + if (!name) { + printf("%s: missing %s\n", __func__, "name"); + exit(1); + } + + if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { + printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); + exit(1); + } + + if (format_name) { + has_format = false; + + if (strcmp("raw", format_name) == 0) { + format = FORMAT_RAW; + has_format = true; + } + + if (strcmp("delta", format_name) == 0) { + format = FORMAT_DELTA; + has_format = true; + } + + if (!has_format) { + fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + exit(1); + } + } + + if (type_name) { + has_type = false; + + if (strcmp("raw", type_name) == 0) { + type = PMT_TYPE_RAW; + has_type = true; + } + + if (strcmp("txtal_time", type_name) == 0) { + type = PMT_TYPE_XTAL_TIME; + has_type = true; + } + + if (!has_type) { + printf("%s: invalid %s: %s\n", __func__, "type", type_name); + exit(1); + } + } + + if (!has_offset) { + printf("%s : missing %s\n", __func__, "offset"); + exit(1); + } + + if (!has_lsb) { + printf("%s: missing %s\n", __func__, "lsb"); + exit(1); + } + + if (!has_msb) { + printf("%s: missing %s\n", __func__, "msb"); + exit(1); + } + + if (!has_guid) { + printf("%s: missing %s\n", __func__, "guid"); + exit(1); + } + + if (!has_scope) { + printf("%s: missing %s\n", __func__, "scope"); + exit(1); + } + + if (lsb > msb) { + printf("%s: lsb > msb doesn't make sense\n", __func__); + exit(1); + } + + pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); +} + +void parse_add_command(char *add_command) +{ + if (strncmp(add_command, "pmt", strlen("pmt")) == 0) + return parse_add_command_pmt(add_command); + return parse_add_command_msr(add_command); +} + int is_deferred_add(char *name) { int i; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f05191e5b6492b97ebf96e9d45dcf3..3d1ca9e38b1fa9d2633b5c38ea41021a2865cc7c 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index eaf091a3d33137a4a09598e5d63145b4a7a4e71b..129f179b0ac563a0da627521f889b502154dcc67 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -385,19 +385,21 @@ struct cxl_test_gen_media { struct cxl_test_gen_media gen_media = { .id = CXL_EVENT_GEN_MEDIA_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = */ + .channel = 1, + .rank = 30, }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = */ - .channel = 1, - .rank = 30 }, }; @@ -409,18 +411,20 @@ struct cxl_test_dram { struct cxl_test_dram dram = { .id = CXL_EVENT_DRAM_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = */ + .channel = 1, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = */ - .channel = 1, .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, @@ -474,11 +478,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.rec.validity_flags); + &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.rec.validity_flags); + &dram.rec.media_hdr.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, @@ -1131,27 +1135,28 @@ static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds) return (count >= poison_inject_dev_max); } -static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) +static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) { + /* Return EBUSY to match the CXL driver handling */ if (mock_poison_dev_max_injected(cxlds)) { dev_dbg(cxlds->dev, "Device poison injection limit has been reached: %d\n", - MOCK_INJECT_DEV_MAX); - return false; + poison_inject_dev_max); + return -EBUSY; } for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { if (!mock_poison_list[i].cxlds) { mock_poison_list[i].cxlds = cxlds; mock_poison_list[i].dpa = dpa; - return true; + return 0; } } dev_dbg(cxlds->dev, "Mock test poison injection limit has been reached: %d\n", MOCK_INJECT_TEST_MAX); - return false; + return -ENXIO; } static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa) @@ -1175,10 +1180,8 @@ static int mock_inject_poison(struct cxl_dev_state *cxlds, dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa); return 0; } - if (!mock_poison_add(cxlds, dpa)) - return -ENXIO; - return 0; + return mock_poison_add(cxlds, dpa); } static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa) diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e164b9611e9dac91cb9e55b69e715..d619672faa497a02c3ea71efd2f3f66144050805 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 7527f738b4a14b28d243f9e263428caf06458c9f..d1acd7d588501229d2ef16219a5218689774e787 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -5,8 +5,8 @@ CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray maple -CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \ - slab.o maple.o +LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o +CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o maple.o $(LIBS) OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ iteration_check_2.o benchmark.o diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c deleted file mode 100644 index 66ec4a24a2031b4119cc6968e884d8c27681f078..0000000000000000000000000000000000000000 --- a/tools/testing/radix-tree/bitmap.c +++ /dev/null @@ -1,23 +0,0 @@ -/* lib/bitmap.c pulls in at least two other files. */ - -#include - -void bitmap_clear(unsigned long *map, unsigned int start, int len) -{ - unsigned long *p = map + BIT_WORD(start); - const unsigned int size = start + len; - int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); - unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - - while (len - bits_to_clear >= 0) { - *p &= ~mask_to_clear; - len -= bits_to_clear; - bits_to_clear = BITS_PER_LONG; - mask_to_clear = ~0UL; - p++; - } - if (len) { - mask_to_clear &= BITMAP_LAST_WORD_MASK(size); - *p &= ~mask_to_clear; - } -} diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index 4c941270d8de0abed76a4d2ee5b1e4bbfff3108d..e4fa507cbdd0ee9c684b500d05c8d7eedb4b9817 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -156,7 +156,7 @@ static void test_hw_debug(pid_t child, int type, const char *type_name) /* Zero is not currently architecturally valid */ ksft_test_result(arch, "%s_arch_set\n", type_name); } else { - ksft_test_result_skip("%s_arch_set\n"); + ksft_test_result_skip("%s_arch_set\n", type_name); } } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6d6d0c29db475c13d2705d8a5ae210a4248a305a..ec7d425c4022af0df958ed9fb20edcc983d85cdc 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -780,7 +780,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index bd8c75b620c29d20c358ecf5c57cd9f54d9aa9a4..c397336fe1ed1c0a6b81576e0bfde87555aab4d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void) } #ifndef __NR_uretprobe -#define __NR_uretprobe 467 +#define __NR_uretprobe 335 #endif __naked unsigned long uretprobe_syscall_call_1(void) @@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void) struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c; - if (ASSERT_OK(pipe(go), "pipe")) + if (!ASSERT_OK(pipe(go), "pipe")) return; skel = uprobe_syscall_executed__open_and_load(); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 16bdc3e255913cb9c1f5c0c9c2ef7d38ccab6d39..ef70b88bccb252bf9a64525be27e257ba6227ee7 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx) return sum; } +__u32 upper, select_n, result; +__u64 global; + +static __noinline bool nest_2(char *str) +{ + /* some insns (including branch insns) to ensure stacksafe() is triggered + * in nest_2(). This way, stacksafe() can compare frame associated with nest_1(). + */ + if (str[0] == 't') + return true; + if (str[1] == 'e') + return true; + if (str[2] == 's') + return true; + if (str[3] == 't') + return true; + return false; +} + +static __noinline bool nest_1(int n) +{ + /* case 0: allocate stack, case 1: no allocate stack */ + switch (n) { + case 0: { + char comm[16]; + + if (bpf_get_current_comm(comm, 16)) + return false; + return nest_2(comm); + } + case 1: + return nest_2((char *)&global); + default: + return false; + } +} + +SEC("raw_tp") +__success +int iter_subprog_check_stacksafe(const void *ctx) +{ + long i; + + bpf_for(i, 0, upper) { + if (!nest_1(select_n)) { + result = 1; + return 0; + } + } + + result = 2; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 991c473e385938030340db7ed7a998c8bff62b5d..12b4eb9d04347f71f19a2c5a696632bf330cca12 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot) EXPECT_EQ(close(fd3), 0); } +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 5f541522364fbd534ea0ec45cef119bdfa6d3dae..5d0a809dc2df9313f5b57aab753ced84a2c96d28 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -29,9 +29,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 931dbc36ca43d6d272798f9fdf8a0111f6a23f8c..011508ca604b086e338012eb23c568c08c4d71e8 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -19,6 +19,15 @@ def _rss_key_rand(length): return [random.randint(0, 255) for _ in range(length)] +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + def get_rss(cfg, context=0): return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] @@ -90,8 +99,9 @@ def _send_traffic_check(cfg, port, name, params): def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" - if len(_get_rx_cnts(cfg)) < 2: - KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -101,6 +111,7 @@ def test_rss_key_indir(cfg): if not data[k]: raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + _rss_key_check(cfg, data=data) key_len = len(data['rss-hash-key']) # Set the key @@ -110,9 +121,26 @@ def test_rss_key_indir(cfg): data = get_rss(cfg) ksft_eq(key, data['rss-hash-key']) + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + # Set the indirection table ethtool(f"-X {cfg.ifname} equal 2") - reset_indir = defer(ethtool, f"-X {cfg.ifname} default") data = get_rss(cfg) ksft_eq(0, min(data['rss-indirection-table'])) ksft_eq(1, max(data['rss-indirection-table'])) @@ -317,8 +345,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ctx_cnt = i break + _rss_key_check(cfg, context=ctx_id) + if not create_with_cfg: ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) # Sanity check the context we just created data = get_rss(cfg, ctx_id) diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index dc0408a831d07c7296ba9c0ff1ede75ff07c3481..75b7b4ef6cfaa7e324a043fb97f7a720804aff55 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { + struct bpf_map *iter_map; int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) @@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[], *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); @@ -686,6 +694,24 @@ TEST_F(hid_bpf, subprog_raw_event) ASSERT_EQ(buf[2], 52); } +/* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + /* * Ensures that we can attach/detach programs */ diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index ee9bbbcf751b4871796518c6545faac6de603575..5ecc845ef79216d42e141e42167289109ea79267 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -455,7 +455,7 @@ struct { __type(value, struct elem); } hmap SEC(".maps"); -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *work) { __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; struct hid_bpf_ctx *hid_ctx; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index cfe37f4919062271bc3a85c16d9b68fdc0d4f474..e5db897586bbfe010d8799f6f52fc5c418344e6b 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *wq), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e7f3c10a6fbe5eacd66c21bc0779a..bf215790a89d739e0df9bdc111e808d51e385af3 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -70,7 +70,7 @@ def test_result(condition, description=""): def finished(): - if ksft_cnt["pass"] == ksft_num_tests: + if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests: exit_code = KSFT_PASS else: exit_code = KSFT_FAIL diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d72176035fedbf2131ef89babd5f16658b0..4abebde781873b80a4fb8e72f4028d98770e553c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -32,13 +32,13 @@ static struct feature_id_reg feat_id_regs[] = { { ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 }, { ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 } }; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index f92c2fb23fcd75523dda259edf82439332e34adc..8e34f7fa44e94c1dac5741824201e2dde5b93751 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); -KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), -KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), -KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), -KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 69849acd95b0afc06ca739fd1ee03ddc5be71e58..618cd244239004ade26294d74ad5ebcd66f17b7c 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -184,6 +184,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -211,4 +238,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 3c1e9f35b53123038082818840176a99632905b3..3b26bf3cf5b9a489e037f5b11b2a635ccb55b83a 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer) ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); } +TEST(cred_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd, dir_fd; + pid_t child; + int status; + + drop_caps(_metadata); + + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + EXPECT_LE(0, dir_fd); + EXPECT_EQ(0, close(dir_fd)); + + /* Denies opening directories. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0, + 0, 0)) + { + TH_LOG("Failed to join session keyring: %s", strerror(errno)); + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* + * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a + * different session keyring in the child, so make that happen. + */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, + NULL, 0, 0, 0)); + + /* + * KEYCTL_SESSION_TO_PARENT installs credentials on the parent + * that never go through the cred_prepare hook, this path uses + * cred_transfer instead. + */ + EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0, + 0, 0, 0)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + _exit(_metadata->exit_code); + return; + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 0086efaa7b681c7be4866b4e4b5d9cb924fe150d..29af19c4e9f981923e231112efb5b4cf08dce3b4 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -2,6 +2,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y +CONFIG_KEYS=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 901e0d07765b6f71f288d731741487b6f99907ff..cfad627e8d94de4a7ac67bf870dc91774ce7294c 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -110,7 +112,7 @@ endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53f8b11b051f78b2e6fb7820ac3194..2c3a0eb6b22d31d5cdd1251a85ee82c401b95c15 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1b03bcfaefdfa2be6131220088ef4e9ce3fce6bb..5a3a9bcba6404853678a8840d9b9f7860b6179ee 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,8 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce655a9c01c3d69200ba79a3f11269..36045edb10dea0bd6992ba6d07b74db3647fe4da 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index fa7eabfaf84159f6be4d76a6b71740c7b3f65bd1..896b3f73fc53bf83b9a54d9141f1e57be0782b0d 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -293,6 +293,20 @@ static int run_test(struct testcase *test, int count) return ret; } +#ifdef __aarch64__ +/* Check if userspace VA > 48 bits */ +static int high_address_present(void) +{ + void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr == MAP_FAILED) + return 0; + + munmap(ptr, 1); + return 1; +} +#endif + static int supported_arch(void) { #if defined(__powerpc64__) @@ -300,7 +314,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return 1; + return high_address_present(); #else return 0; #endif diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaebecc81479bca91c88493929fc4bfd..535eb2c3d7d1c8675eb38efd2a6774106c221152 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, + const char *expected_buf, int expected_len, int buf_len, int flags) { int i, ret[2], recv_errno[2], expected_errno = 0; diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f77300083c0ee27462223d6ccd10e2d7..8ee4489238ca4b19bcf6608a71256ba89472148f 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -146,6 +146,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d69d8d3aba159406ec6391b8e401f..4209b95690394b7565f330a37606bf39b6d2d228 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1115,11 +1115,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef10a9ecba6239929d9e52c642a356f..9ea6d698e9d3827fa9bfaa1ed5c922238c7847b2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -661,7 +661,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -1415,18 +1415,28 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1438,7 +1448,7 @@ chk_add_nr() fi print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1449,7 +1459,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1459,7 +1469,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1470,7 +1480,7 @@ chk_add_nr() fi print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1481,7 +1491,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1492,7 +1502,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1503,7 +1513,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1513,6 +1523,8 @@ chk_add_nr() print_ok fi fi + + print_info "$extra_msg" } chk_add_tx_nr() @@ -1634,6 +1646,8 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count print_check "ptx" @@ -1655,6 +1669,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1955,6 +1989,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -2612,11 +2661,24 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2625,20 +2687,20 @@ backup_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +2709,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +2737,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +2747,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -3053,7 +3124,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3137,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3318,7 +3389,7 @@ userspace_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 - chk_prio_nr 0 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3526,6 +3597,35 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 mptcp_lib_kill_wait $tests_pid fi + + # remove and re-add + if reset "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 + mptcp_lib_kill_wait $tests_pid + fi + } # [$1: error message] diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f925835a0c27a25f8d2090e8791414e..d13fb5ea3e89425a43c879a7e051835ae5bf7443 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 0000000000000000000000000000000000000000..6a764d70ab06f984d0a07faa4791fe4741b9001b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin < /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bfec6bc8bea1a704e7bc074ad794a5..3f2fca02fec53f52beaddb43ea4255bca24b79b8 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -255,6 +258,13 @@ struct testcase testcases_v6[] = { .gso_len = 1, .r_num_mss = 2, }, + { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, { /* send 2B + 2B + 1B segments */ .tlen = 5, @@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e3f97f90d8db0f6fb8864d9641da44f9138c8b6b..8c3a73461475bae52436e311d4f0b723c059b664 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -60,7 +60,9 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 diff --git a/tools/testing/selftests/turbostat/added_perf_counters.py b/tools/testing/selftests/turbostat/added_perf_counters.py new file mode 100755 index 0000000000000000000000000000000000000000..9ab4aaf45fb8fbbc48fe3b46bc5f178037d0996c --- /dev/null +++ b/tools/testing/selftests/turbostat/added_perf_counters.py @@ -0,0 +1,178 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +class PerfCounterInfo: + def __init__(self, subsys, event): + self.subsys = subsys + self.event = event + + def get_perf_event_name(self): + return f'{self.subsys}/{self.event}/' + + def get_turbostat_perf_id(self, counter_scope, counter_type, column_name): + return f'perf/{self.subsys}/{self.event},{counter_scope},{counter_type},{column_name}' + +PERF_COUNTERS_CANDIDATES = [ + PerfCounterInfo('msr', 'mperf'), + PerfCounterInfo('msr', 'aperf'), + PerfCounterInfo('msr', 'tsc'), + PerfCounterInfo('cstate_core', 'c1-residency'), + PerfCounterInfo('cstate_core', 'c6-residency'), + PerfCounterInfo('cstate_core', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c2-residency'), + PerfCounterInfo('cstate_pkg', 'c3-residency'), + PerfCounterInfo('cstate_pkg', 'c6-residency'), + PerfCounterInfo('cstate_pkg', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c8-residency'), + PerfCounterInfo('cstate_pkg', 'c9-residency'), + PerfCounterInfo('cstate_pkg', 'c10-residency'), +] +present_perf_counters = [] + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + if b'' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + return True + + for counter in PERF_COUNTERS_CANDIDATES: + if has_perf_counter_access(counter.get_perf_event_name()): + present_perf_counters.append(counter) + + if len(present_perf_counters) == 0: + print('SKIP: Could not read any perf counter.') + return False + + if len(present_perf_counters) != len(PERF_COUNTERS_CANDIDATES): + print(f'WARN: Could not access all of the counters - some will be left untested') + + return True + +if not check_perf_access(): + exit(0) + +turbostat_counter_source_opts = [''] + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = [b'usec', b'Time_Of_Day_Seconds', b'APIC', b'X2APIC'] + +expected_columns = [b'CPU'] +counters_argv = [] +for counter in present_perf_counters: + if counter.subsys == 'cstate_core': + counter_scope = 'core' + elif counter.subsys == 'cstate_pkg': + counter_scope = 'package' + else: + counter_scope = 'cpu' + + counter_type = 'delta' + column_name = counter.event + + cparams = counter.get_turbostat_perf_id( + counter_scope = counter_scope, + counter_type = counter_type, + column_name = column_name + ) + expected_columns.append(column_name.encode()) + counters_argv.extend(['--add', cparams]) + +expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + expected_columns + +def gen_user_friendly_cmdline(argv_): + argv = argv_[:] + ret = '' + + while len(argv) != 0: + arg = argv.pop(0) + arg_next = '' + + if arg in ('-i', '--show', '--add'): + arg_next = argv.pop(0) if len(argv) > 0 else '' + + ret += f'{arg} {arg_next} \\\n\t' + + # Remove the last separator and return + return ret[:-4] + +# +# Run turbostat for some time and send SIGINT +# +timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] +turbostat_argv = [turbostat, '-i', '0.50', '--show', 'CPU'] + counters_argv + +def check_columns_or_fail(expected_columns: list, actual_columns: list): + if len(actual_columns) != len(expected_columns): + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + + failed = False + for expected_column in expected_columns: + if expected_column not in actual_columns: + print(f'turbostat column check failed: missing column {expected_column.decode()}') + failed = True + + if failed: + exit(1) + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns, actual_columns) +print('OK') + +# +# Same, but with --debug +# +# We explicitly specify '--show CPU' to make sure turbostat +# don't show a bunch of default counters instead. +# +turbostat_argv.append('--debug') + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat (in debug mode) with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns_debug, actual_columns) +print('OK') diff --git a/tools/testing/selftests/turbostat/smi_aperf_mperf.py b/tools/testing/selftests/turbostat/smi_aperf_mperf.py new file mode 100755 index 0000000000000000000000000000000000000000..6289cc47d5f00e29dcee4b7c206bcebf38640589 --- /dev/null +++ b/tools/testing/selftests/turbostat/smi_aperf_mperf.py @@ -0,0 +1,157 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +# CDLL calls dlopen underneath. +# Calling it with None (null), we get handle to the our own image (python interpreter). +# We hope to find sched_getcpu() inside ;] +# This is a bit ugly, but helps shipping working software, so.. +try: + import ctypes + + this_image = ctypes.CDLL(None) + BASE_CPU = this_image.sched_getcpu() +except: + BASE_CPU = 0 # If we fail, set to 0 and pray it's not offline. + +MSR_IA32_MPERF = 0x000000e7 +MSR_IA32_APERF = 0x000000e8 + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + if b'' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + return True + + if not has_perf_counter_access('msr/mperf/'): + return False + if not has_perf_counter_access('msr/aperf/'): + return False + if not has_perf_counter_access('msr/smi/'): + return False + + return True + +def check_msr_access(): + try: + file_msr = open(f'/dev/cpu/{BASE_CPU}/msr', 'rb') + except: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_MPERF)) != 8: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_APERF)) != 8: + return False + + return True + +has_perf_access = check_perf_access() +has_msr_access = check_msr_access() + +turbostat_counter_source_opts = [''] + +if has_msr_access: + turbostat_counter_source_opts.append('--no-perf') +else: + print('SKIP: doesn\'t have MSR access, skipping run with --no-perf') + +if has_perf_access: + turbostat_counter_source_opts.append('--no-msr') +else: + print('SKIP: doesn\'t have perf access, skipping run with --no-msr') + +if not has_msr_access and not has_perf_access: + print('SKIP: No MSR nor perf access detected. Skipping the tests entirely') + exit(0) + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = b'usec\tTime_Of_Day_Seconds\tAPIC\tX2APIC' + +SMI_APERF_MPERF_DEPENDENT_BICS = [ + 'SMI', + 'Avg_MHz', + 'Busy%', + 'Bzy_MHz', +] +if has_perf_access: + SMI_APERF_MPERF_DEPENDENT_BICS.append('IPC') + +for bic in SMI_APERF_MPERF_DEPENDENT_BICS: + for counter_source_opt in turbostat_counter_source_opts: + + # Ugly special case, but it is what it is.. + if counter_source_opt == '--no-perf' and bic == 'IPC': + continue + + expected_columns = bic.encode() + expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + f'\t{bic}'.encode() + + # + # Run turbostat for some time and send SIGINT + # + timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] + turbostat_argv = [turbostat, '-i', '0.50', '--show', bic] + + if counter_source_opt: + turbostat_argv.append(counter_source_opt) + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns != actual_columns: + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + print('OK') + + # + # Same, but with --debug + # + turbostat_argv.append('--debug') + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns_debug != actual_columns: + print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}') + exit(1) + print('OK') diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840e41a3154bff1f67aa6a816e0d57a..2f756628613dd82d86a22e1e17752c7e3f36c941 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index b14e14cdbfb9fd800b45c9d76d3a7a5ea3a44be4..fd6a3010afa833e077623065b80bdbb5b1012250 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_PRIVATE_MEM bool -config HAVE_KVM_GMEM_PREPARE +config HAVE_KVM_ARCH_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM -config HAVE_KVM_GMEM_INVALIDATE +config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 229570059a1bb5fc44be96499b17090d13150d4e..992f9beb3e7d0403d7f3a4d780025a5660fdbaa5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -97,18 +97,19 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); /* - * synchronize_srcu(&kvm->irq_srcu) already called + * synchronize_srcu_expedited(&kvm->irq_srcu) already called * in kvm_unregister_irq_ack_notifier(). */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); + } else { + synchronize_srcu_expedited(&kvm->irq_srcu); } mutex_unlock(&kvm->irqfds.resampler_lock); @@ -126,7 +127,7 @@ irqfd_shutdown(struct work_struct *work) u64 cnt; /* Make sure irqfd has been initialized in assign path. */ - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); /* * Synchronize with the wait-queue and unhook ourselves to prevent @@ -384,7 +385,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -523,7 +524,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); kvm_arch_post_irq_ack_notifier_list_update(kvm); } @@ -608,7 +609,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Take note of a change in irq routing. - * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. + * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 1c509c3512614666f6cf592766ca4e5c3a8b9266..8f079a61a56db761116f0c12cc57322cc58d5a4f 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,84 +13,93 @@ struct kvm_gmem { struct list_head entry; }; -static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +/** + * folio_file_pfn - like folio_file_page, but return a pfn. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Return: The pfn for this index. + */ +static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) { -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - struct kvm_gmem *gmem; + return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); +} - list_for_each_entry(gmem, gmem_list, entry) { - struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; - struct page *page; - kvm_pfn_t pfn; - gfn_t gfn; - int rc; - - if (!kvm_arch_gmem_prepare_needed(kvm)) - continue; - - slot = xa_load(&gmem->bindings, index); - if (!slot) - continue; - - page = folio_file_page(folio, index); - pfn = page_to_pfn(page); - gfn = slot->base_gfn + index - slot->gmem.pgoff; - rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); - if (rc) { - pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", - index, gfn, pfn, rc); - return rc; - } +static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE + kvm_pfn_t pfn = folio_file_pfn(folio, index); + gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; + int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", + index, gfn, pfn, rc); + return rc; } - #endif + return 0; } -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) +static inline void kvm_gmem_mark_prepared(struct folio *folio) { - struct folio *folio; + folio_mark_uptodate(folio); +} - /* TODO: Support huge pages. */ - folio = filemap_grab_folio(inode->i_mapping, index); - if (IS_ERR(folio)) - return folio; +/* + * Process @folio, which contains @gfn, so that the guest can use it. + * The folio must be locked and the gfn must be contained in @slot. + * On successful return the guest sees a zero page so as to avoid + * leaking host data and the up-to-date flag is set. + */ +static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, struct folio *folio) +{ + unsigned long nr_pages, i; + pgoff_t index; + int r; + + nr_pages = folio_nr_pages(folio); + for (i = 0; i < nr_pages; i++) + clear_highpage(folio_page(folio, i)); /* - * Use the up-to-date flag to track whether or not the memory has been - * zeroed before being handed off to the guest. There is no backing - * storage for the memory, so the folio will remain up-to-date until - * it's removed. + * Preparing huge folios should always be safe, since it should + * be possible to split them later if needed. * - * TODO: Skip clearing pages when trusted firmware will do it when - * assigning memory to the guest. + * Right now the folio order is always going to be zero, but the + * code is ready for huge folios. The only assumption is that + * the base pgoff of memslots is naturally aligned with the + * requested page order, ensuring that huge folios can also use + * huge page table entries for GPA->HPA mapping. + * + * The order will be passed when creating the guest_memfd, and + * checked when creating memslots. */ - if (!folio_test_uptodate(folio)) { - unsigned long nr_pages = folio_nr_pages(folio); - unsigned long i; - - for (i = 0; i < nr_pages; i++) - clear_highpage(folio_page(folio, i)); - - folio_mark_uptodate(folio); - } + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); + index = gfn - slot->base_gfn + slot->gmem.pgoff; + index = ALIGN_DOWN(index, 1 << folio_order(folio)); + r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); + if (!r) + kvm_gmem_mark_prepared(folio); - if (prepare) { - int r = kvm_gmem_prepare_folio(inode, index, folio); - if (r < 0) { - folio_unlock(folio); - folio_put(folio); - return ERR_PTR(r); - } - } + return r; +} - /* - * Ignore accessed, referenced, and dirty flags. The memory is - * unevictable and there is no storage to write back to. - */ - return folio; +/* + * Returns a locked folio on success. The caller is responsible for + * setting the up-to-date flag before the memory is mapped into the guest. + * There is no backing storage for the memory, so the folio will remain + * up-to-date until it's removed. + * + * Ignore accessed, referenced, and dirty flags. The memory is + * unevictable and there is no storage to write back to. + */ +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +{ + /* TODO: Support huge pages. */ + return filemap_grab_folio(inode->i_mapping, index); } static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, @@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index, true); + folio = kvm_gmem_get_folio(inode, index); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE static void kvm_gmem_free_folio(struct folio *folio) { struct page *page = folio_page(folio, 0); @@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE .free_folio = kvm_gmem_free_folio, #endif }; @@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) fput(file); } -static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) +/* Returns a locked folio on success. */ +static struct folio * +__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, + int *max_order) { pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; struct kvm_gmem *gmem = file->private_data; struct folio *folio; - struct page *page; - int r; if (file != slot->gmem.file) { WARN_ON_ONCE(slot->gmem.file); - return -EFAULT; + return ERR_PTR(-EFAULT); } gmem = file->private_data; if (xa_load(&gmem->bindings, index) != slot) { WARN_ON_ONCE(xa_load(&gmem->bindings, index)); - return -EIO; + return ERR_PTR(-EIO); } - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); + folio = kvm_gmem_get_folio(file_inode(file), index); if (IS_ERR(folio)) - return PTR_ERR(folio); + return folio; if (folio_test_hwpoison(folio)) { folio_unlock(folio); folio_put(folio); - return -EHWPOISON; + return ERR_PTR(-EHWPOISON); } - page = folio_file_page(folio, index); - - *pfn = page_to_pfn(page); + *pfn = folio_file_pfn(folio, index); if (max_order) *max_order = 0; - r = 0; - - folio_unlock(folio); - - return r; + *is_prepared = folio_test_uptodate(folio); + return folio; } int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order) { struct file *file = kvm_gmem_get_file(slot); - int r; + struct folio *folio; + bool is_prepared = false; + int r = 0; if (!file) return -EFAULT; - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); + folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); + if (IS_ERR(folio)) { + r = PTR_ERR(folio); + goto out; + } + + if (!is_prepared) + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + + folio_unlock(folio); + if (r < 0) + folio_put(folio); + +out: fput(file); return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { @@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); for (i = 0; i < npages; i += (1 << max_order)) { + struct folio *folio; gfn_t gfn = start_gfn + i; + bool is_prepared = false; kvm_pfn_t pfn; if (signal_pending(current)) { @@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long break; } - ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); - if (ret) + folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; + } - if (!IS_ALIGNED(gfn, (1 << max_order)) || - (npages - i) < (1 << max_order)) - max_order = 0; + if (is_prepared) { + folio_unlock(folio); + folio_put(folio); + ret = -EEXIST; + break; + } + + folio_unlock(folio); + WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || + (npages - i) < (1 << max_order)); + + ret = -EINVAL; + while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), + KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + if (!max_order) + goto put_folio_and_exit; + max_order--; + } p = src ? src + i * PAGE_SIZE : NULL; ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + if (!ret) + kvm_gmem_mark_prepared(folio); - put_page(pfn_to_page(pfn)); +put_folio_and_exit: + folio_put(folio); if (ret) break; } @@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long return ret && !i ? ret : i; } EXPORT_SYMBOL_GPL(kvm_gmem_populate); +#endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0788d0a72cc090c8d4eef33c9b05a9d0a923a43..cb2b78e92910fb1d6ebed0b73cf359e866f0a06b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1578,15 +1578,14 @@ static int check_memory_region_flags(struct kvm *kvm, if (mem->flags & KVM_MEM_GUEST_MEMFD) valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES; -#ifdef CONFIG_HAVE_KVM_READONLY_MEM /* * GUEST_MEMFD is incompatible with read-only memslots, as writes to * read-only memslots have emulated MMIO, not page fault, semantics, * and KVM doesn't allow emulated MMIO for private memory. */ - if (!(mem->flags & KVM_MEM_GUEST_MEMFD)) + if (kvm_arch_has_readonly_mem(kvm) && + !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; -#endif if (mem->flags & ~valid_flags) return -EINVAL; @@ -2398,48 +2397,47 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static u64 kvm_supported_mem_attributes(struct kvm *kvm) +{ + if (!kvm || kvm_arch_has_private_mem(kvm)) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +} + /* * Returns true if _all_ gfns in the range [@start, @end) have attributes - * matching @attrs. + * such that the bits in @mask match @attrs. */ bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs) + unsigned long mask, unsigned long attrs) { XA_STATE(xas, &kvm->mem_attr_array, start); unsigned long index; - bool has_attrs; void *entry; - rcu_read_lock(); + mask &= kvm_supported_mem_attributes(kvm); + if (attrs & ~mask) + return false; - if (!attrs) { - has_attrs = !xas_find(&xas, end - 1); - goto out; - } + if (end == start + 1) + return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; + + guard(rcu)(); + if (!attrs) + return !xas_find(&xas, end - 1); - has_attrs = true; for (index = start; index < end; index++) { do { entry = xas_next(&xas); } while (xas_retry(&xas, entry)); - if (xas.xa_index != index || xa_to_value(entry) != attrs) { - has_attrs = false; - break; - } + if (xas.xa_index != index || + (xa_to_value(entry) & mask) != attrs) + return false; } -out: - rcu_read_unlock(); - return has_attrs; -} - -static u64 kvm_supported_mem_attributes(struct kvm *kvm) -{ - if (!kvm || kvm_arch_has_private_mem(kvm)) - return KVM_MEMORY_ATTRIBUTE_PRIVATE; - - return 0; + return true; } static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, @@ -2534,7 +2532,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, mutex_lock(&kvm->slots_lock); /* Nothing to do if the entire range as the desired attributes. */ - if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) + if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) goto out_unlock; /*